This Rmarkdown contains paragraphs taken from
Schubert, A.; von Streit, A. & Garschagen, M. (2024): Unravelling the capacity-action gap in flood risk adaptation. In: Natural Hazards and Earth System Sciences Discussion [preprint], https://doi.org/10.5194/nhess-2024-121.
Load the necessary libraries
library(here) # for file referencing
library(haven) # to work with the SPSS labbelled dataset
library(tidyverse) # for data manipulation
library(mice) # for data preparation
library(sandwich) # cluster-robust standard error
library(marginaleffects) # for AME calculation
library(DHARMa) # residual diagnostics for generalized linear (mixed) models
library(ggplot2) # to create forest plots
library(cowplot) # to arrange plots
Load the imputed dataset, drop observations with missing information on adaptation action from the data (multiple imputation, then deletion approach) and do necessary recodings.
# load data and rename dataset
load(here::here("./data/tidy/KARE_imp_inck.RData"))
KARE_MI <- KARE_imp
### MID (multiple imputation, then deletion):
# use Y to impute X, but drop obs with missing Y from the analysis (von Hippel 2007)
# create a long format dataset
KARE_MI_long <- complete(KARE_MI, action = "long", include = TRUE)
# drop obs with NA on Y
KARE_MI_long <- subset(KARE_MI_long, !(IDS %in% KARE_MI_long$IDS[is.na(KARE_MI_long$numadmeas)]))
# recode R75 from imputed R75a (tenants - polyreg) and R75b (owners - logreg)
KARE_MI_long$R75 <- ifelse(KARE_MI_long$R7 == "Miete", KARE_MI_long$R75a,
ifelse(KARE_MI_long$R75b == "Öffentliche Stellen (d.h. Bund, Länder oder Gemeinden)", 3, 1))
KARE_MI_long$R75 <- as_factor(KARE_MI_long$R75)
KARE_MI <- mice::as.mids(KARE_MI_long)
# n = 1,571 obs
### turn ordered factors into unordered to get pairwise comparisons
# instead of polynomial contrasts
KARE_MI$data$R18 <- factor(KARE_MI$data$R18,
ordered = F)
KARE_MI$data$R80a_1 <- factor(KARE_MI$data$R80a_1,
ordered = F)
KARE_MI$data$R92 <- factor(KARE_MI$data$R92,
ordered = F)
KARE_MI$data$R104 <- factor(KARE_MI$data$R104,
ordered = F)
KARE_MI$data$Modus <- factor(KARE_MI$data$Modus,
ordered = F)
### change reference levels
KARE_MI$data$Modus <- relevel(KARE_MI$data$Modus, ref = "2")
KARE_MI$data$R92 <- relevel(KARE_MI$data$R92, ref = "unsure/short-term")
KARE_MI$data$R18 <- relevel(KARE_MI$data$R18, ref = "Überhaupt nicht wahrscheinlich")
KARE_MI$data$R4a5 <- relevel(KARE_MI$data$R4a5, ref = "none")
KARE_MI$data$R7 <- relevel(KARE_MI$data$R7, ref = "Miete")
KARE_MI$data$R80a_1 <- relevel(KARE_MI$data$R80a_1, ref = "Rather no/no")
To explore whether a household adapts (yes/no), we run logistic regression models. For each model, assumptions were checked to ensure the validity and reliability of the results. To address the problem of unobserved heterogeneity in logistic and Poisson models, all effects are presented as average marginal effects (AME) (Mood 2010; Arel-Bundock et al. Forthcoming).
# fit logistic regression to each imputed data set
glm_multiimp <- with(KARE_MI,
glm(admeas ~ R104 + R109 + R71 + # generic capacity
R7 + R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = binomial("logit")))
# calculate cluster-robust AME
marginal <- avg_slopes(glm_multiimp, vcov = ~town)
marginal$order <- c(32, 33, 1, 2, 28, 3, 12, 13, 14, 15, 16, 21,
23, 22, 24, 19, 30, 29, 5, 31, 4, 17, 18, 20, 11,
9, 10, 6, 8, 7, 25, 26, 27)
marginal <- marginal %>%
mutate(across(c(3:5, 10:13), round, 4))
b_log_AME <- c(NA, marginal$estimate)
se_log_AME <- c(NA, marginal$std.error)
p_log_AME <- c(NA, marginal$p.value)
print(marginal[order(marginal$order, decreasing = F),], nrows = 40, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | -0.0234 | 0.0287 | -0.8163 | 0.4144 | 1.3 | -0.0797 | 0.0328 | 6111 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | -0.0172 | 0.0303 | -0.5687 | 0.5696 | 0.8 | -0.0767 | 0.0422 | 8232 |
| R109 | mean(dY/dX) | 0.0055 | 0.0053 | 1.0521 | 0.2930 | 1.8 | -0.0048 | 0.0159 | 1118 |
| R71 | mean(dY/dX) | 0.0001 | 0.0002 | 0.3255 | 0.7448 | 0.4 | -0.0003 | 0.0004 | 13398 |
| R7 | mean(Eigentum) - mean(Miete) | 0.2417 | 0.0461 | 5.2398 | <0.001 | 22.6 | 0.1513 | 0.3321 | 223162 |
| R91 | mean(dY/dX) | -0.0002 | 0.0006 | -0.3621 | 0.7173 | 0.5 | -0.0014 | 0.0009 | 409031 |
| R92 | mean(medium-term) - mean(unsure/short-term) | 0.0056 | 0.0262 | 0.2124 | 0.8318 | 0.3 | -0.0458 | 0.0570 | 32853 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.0110 | 0.0263 | 0.4169 | 0.6767 | 0.6 | -0.0406 | 0.0625 | 203058 |
| R90_1 | mean(dY/dX) | 0.0196 | 0.0080 | 2.4378 | 0.0148 | 6.1 | 0.0038 | 0.0353 | 31744 |
| R90_5 | mean(dY/dX) | -0.0144 | 0.0082 | -1.7647 | 0.0776 | 3.7 | -0.0305 | 0.0016 | 32887 |
| R9 | mean(dY/dX) | 0.0059 | 0.0065 | 0.9075 | 0.3642 | 1.5 | -0.0069 | 0.0188 | 171512 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.0886 | 0.0314 | 2.8202 | 0.0048 | 7.7 | 0.0270 | 0.1502 | 452893 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.1055 | 0.0428 | 2.4664 | 0.0137 | 6.2 | 0.0217 | 0.1893 | 297941 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.1735 | 0.0433 | 4.0117 | <0.001 | 14.0 | 0.0888 | 0.2583 | 87527 |
| R4a5 | mean(damage) - mean(none) | 0.1152 | 0.0231 | 4.9865 | <0.001 | 20.6 | 0.0700 | 0.1605 | 4500732 |
| R4a5 | mean(experience) - mean(none) | -0.0092 | 0.0266 | -0.3468 | 0.7288 | 0.5 | -0.0613 | 0.0429 | 3328160 |
| R75 | mean(2) - mean(1) | -0.0415 | 0.0417 | -0.9951 | 0.3197 | 1.6 | -0.1231 | 0.0402 | 100390 |
| R75 | mean(3) - mean(1) | 0.0062 | 0.0236 | 0.2630 | 0.7925 | 0.3 | -0.0400 | 0.0524 | 15732 |
| R63_6 | mean(dY/dX) | -0.0102 | 0.0073 | -1.4056 | 0.1600 | 2.6 | -0.0244 | 0.0040 | 2047 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.0014 | 0.0139 | 0.0995 | 0.9207 | 0.1 | -0.0259 | 0.0286 | 11443 |
| R63_1 | mean(dY/dX) | -0.0018 | 0.0064 | -0.2742 | 0.7839 | 0.4 | -0.0143 | 0.0108 | 31562 |
| R63_3 | mean(dY/dX) | 0.0110 | 0.0053 | 2.1011 | 0.0357 | 4.8 | 0.0007 | 0.0213 | 3758 |
| R63_2 | mean(dY/dX) | 0.0378 | 0.0066 | 5.6836 | <0.001 | 26.1 | 0.0248 | 0.0508 | 11835 |
| R63_4 | mean(dY/dX) | -0.0040 | 0.0059 | -0.6767 | 0.4987 | 1.0 | -0.0156 | 0.0076 | 2782 |
| R97 | mean(Weiblich) - mean(Männlich) | 0.0035 | 0.0205 | 0.1723 | 0.8632 | 0.2 | -0.0367 | 0.0437 | 126435 |
| R98 | mean(dY/dX) | 0.0001 | 0.0008 | 0.1103 | 0.9122 | 0.1 | -0.0014 | 0.0016 | 46571 |
| R99 | mean(1) - mean(0) | 0.0316 | 0.0521 | 0.6067 | 0.5440 | 0.9 | -0.0704 | 0.1336 | 1144908 |
| R106 | mean(dY/dX) | 0.0051 | 0.0089 | 0.5751 | 0.5652 | 0.8 | -0.0124 | 0.0226 | 17491 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | -0.0217 | 0.0225 | -0.9626 | 0.3358 | 1.6 | -0.0657 | 0.0224 | 178739 |
| R69 | mean(Apartment building) - mean(Single family home) | -0.0391 | 0.0240 | -1.6251 | 0.1041 | 3.3 | -0.0862 | 0.0081 | 194062 |
| R70 | mean(dY/dX) | 0.0001 | 0.0003 | 0.3861 | 0.6994 | 0.5 | -0.0005 | 0.0007 | 3461 |
| Modus | mean(1) - mean(2) | 0.0471 | 0.0197 | 2.3938 | 0.0167 | 5.9 | 0.0085 | 0.0857 | 249749 |
| Modus | mean(3) - mean(2) | -0.0350 | 0.0343 | -1.0191 | 0.3081 | 1.7 | -0.1023 | 0.0323 | 1760684 |
### Goodness of fit measures
#R^2
Nagelkerke_log_all <- numeric(30)
for (i in 1:30) {
Nagelkerke_log_all_res <- c(DescTools::PseudoR2(glm_multiimp$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_log_all[i] <- Nagelkerke_log_all_res
}
median(Nagelkerke_log_all)
## [1] 0.4166494
# BIC
BIC_log_all <- numeric(30)
for (i in 1:30) {
BIC_log_all_res <- c(BIC(glm_multiimp$analyses[[i]]))
BIC_log_all[i] <- BIC_log_all_res
}
median(BIC_log_all)
## [1] 1358.973
We checked the four logistic regression assumptions: 1) linearity, 2) independence, 3) no multicollinearity, and 4) exogeneity. Predictors are not affected by multicollinearity (variance inflation factor < 2). The violation of the independence assumptions is accounted for in two ways. Firstly, we estimate cluster-robust standard errors at the municipal level to account for the fact that respondents from the same municipality might be more similar to each other in terms of adaptive capacity and action. Secondly, the exogenous sample selection is removed by conditioning on the characteristics which are over- and underrepresented (e.g. age, income, education) (Wooldridge 2013, p. 325). To fulfil the exogeneity assumption and eliminate spurious correlations, additional variables such as house characteristics and survey mode are controlled for.
# 1) Linearity
# not useful to plot the raw residuals, examine binned residual plots
arm::binnedplot(fitted(glm_multiimp$analyses[[7]]),
residuals(glm_multiimp$analyses[[7]], type = "response"),
nclass = NULL,
xlab = "Expected Values",
ylab = "Average residual",
main = "Binned residual plot",
cex.pts = 0.8,
col.pts = 1,
col.int = "gray")
KARE_data6 <- complete(KARE_MI, action=6)
arm::binnedplot(KARE_data6$R109,
residuals(glm_multiimp$analyses[[6]], type = "response"),
nclass = NULL,
xlab = "Expected Values",
ylab = "Average residual",
main = "Binned residual plot",
cex.pts = 0.8,
col.pts = 1,
col.int = "gray")
# 2) Independence
# --> Respondents from the same town might be more similar to one another on the
# outcome measure, on average, than they are with respondents across towns
# plot residuals vs spatial variable
glm_data6 <- glm(data = KARE_data6,
admeas ~ R104 + R109 + R71 + # generic capacity
R7 + R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = binomial("logit"))
# by town
ggplot(data = data.frame(x = KARE_data6$town, y = glm_data6$residuals), aes(x = x, y = y)) +
geom_boxplot() +
coord_cartesian(ylim = c(-20, 20))
# some patterns --> use cluster-robust SEs
# maybe also due to small cluster sizes (e.g. 2, 5, 10 respondents)
# 3) No multicollinearity
car::vif(glm_multiimp$analyses[[10]])
## GVIF Df GVIF^(1/(2*Df))
## R104 1.276263 2 1.062882
## R109 1.397921 1 1.182337
## R71 1.493679 1 1.222162
## R7 3.492387 1 1.868793
## R91 1.588713 1 1.260442
## R92 1.368422 2 1.081571
## R90_1 1.579022 1 1.256592
## R90_5 1.653925 1 1.286050
## R9 1.163867 1 1.078826
## R18 1.606683 3 1.082235
## R4a5 1.219519 2 1.050866
## R75 3.224814 2 1.340066
## R63_6 1.213632 1 1.101650
## R80a_1 1.102197 1 1.049856
## R63_1 1.205709 1 1.098048
## R63_3 1.202659 1 1.096658
## R63_2 1.215955 1 1.102703
## R63_4 1.152534 1 1.073561
## R97 1.132500 1 1.064190
## R98 1.713783 1 1.309115
## R99 1.044298 1 1.021909
## R106 1.498910 1 1.224300
## R69 1.402234 2 1.088191
## R70 1.114329 1 1.055618
## Modus 1.522843 2 1.110871
# GVIF (1/(2*Df) < 2 for all --> no multicollinearity
# 4) Exogeneity
# --> all relevant CVs are included in the model
# fit logistic regression to each imputed data set
glm_multiimp_own <- with(KARE_MI,
glm(admeas ~ R104 + R109 + R71 + # generic capacity
R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = binomial("logit"),
subset = (R7 == "Eigentum")))
# calculate AME
marginal_own <- avg_slopes(glm_multiimp_own, vcov = ~town)
marginal_own$order <- c(32, 33, 1, 2, 28, 3, 12, 13, 14, 15, 16, 21,
23, 22, 24, 19, 30, 29, 31, 4, 18, 20, 11,
9, 10, 6, 8, 7, 25, 26, 27)
marginal_own <- marginal_own %>%
mutate(across(c(3:5, 10:13), round, 4))
b_log_AME_own <- c(NA, marginal_own$estimate)
se_log_AME_own <- c(NA, marginal_own$std.error)
p_log_AME_own <- c(NA, marginal_own$p.value)
print(marginal_own[order(marginal_own$order, decreasing = F),], nrows = 40, digits = 4, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | 0.0010 | 0.0294 | 0.0349 | 0.9722 | 0.0 | -0.0566 | 0.0586 | 6988.0 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | 0.0049 | 0.0306 | 0.1610 | 0.8721 | 0.2 | -0.0551 | 0.0650 | 5536.5 |
| R109 | mean(dY/dX) | 0.0073 | 0.0055 | 1.3373 | 0.1817 | 2.5 | -0.0034 | 0.0181 | 552.9 |
| R71 | mean(dY/dX) | 0.0001 | 0.0002 | 0.4176 | 0.6763 | 0.6 | -0.0002 | 0.0004 | 7382.6 |
| R91 | mean(dY/dX) | -0.0001 | 0.0006 | -0.2333 | 0.8156 | 0.3 | -0.0013 | 0.0010 | 1102223.8 |
| R92 | mean(medium-term) - mean(unsure/short-term) | 0.0257 | 0.0366 | 0.7032 | 0.4819 | 1.1 | -0.0460 | 0.0974 | 1378075.1 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.0011 | 0.0247 | 0.0451 | 0.9640 | 0.1 | -0.0474 | 0.0496 | 460905.6 |
| R90_1 | mean(dY/dX) | 0.0151 | 0.0066 | 2.3080 | 0.0210 | 5.6 | 0.0023 | 0.0280 | 24194.2 |
| R90_5 | mean(dY/dX) | -0.0021 | 0.0074 | -0.2836 | 0.7767 | 0.4 | -0.0165 | 0.0123 | 37201.6 |
| R9 | mean(dY/dX) | 0.0042 | 0.0064 | 0.6552 | 0.5124 | 1.0 | -0.0084 | 0.0168 | 147520.5 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.0622 | 0.0292 | 2.1315 | 0.0331 | 4.9 | 0.0050 | 0.1194 | 192797.2 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.0433 | 0.0365 | 1.1866 | 0.2354 | 2.1 | -0.0282 | 0.1149 | 67149.1 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.1180 | 0.0372 | 3.1704 | 0.0015 | 9.4 | 0.0450 | 0.1909 | 25940.2 |
| R4a5 | mean(damage) - mean(none) | 0.0549 | 0.0218 | 2.5206 | 0.0117 | 6.4 | 0.0122 | 0.0976 | 4537213.8 |
| R4a5 | mean(experience) - mean(none) | 0.0166 | 0.0224 | 0.7409 | 0.4588 | 1.1 | -0.0273 | 0.0605 | 1222565.8 |
| R75 | mean(3) - mean(1) | -0.0048 | 0.0193 | -0.2503 | 0.8024 | 0.3 | -0.0427 | 0.0330 | 16884.7 |
| R63_6 | mean(dY/dX) | -0.0050 | 0.0075 | -0.6678 | 0.5044 | 1.0 | -0.0197 | 0.0097 | 1032.9 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.0007 | 0.0157 | 0.0446 | 0.9645 | 0.1 | -0.0301 | 0.0315 | 15753.1 |
| R63_1 | mean(dY/dX) | -0.0047 | 0.0067 | -0.7002 | 0.4838 | 1.0 | -0.0179 | 0.0085 | 12391.0 |
| R63_3 | mean(dY/dX) | 0.0038 | 0.0062 | 0.6038 | 0.5460 | 0.9 | -0.0084 | 0.0159 | 2632.6 |
| R63_2 | mean(dY/dX) | 0.0260 | 0.0079 | 3.2819 | 0.0010 | 9.9 | 0.0105 | 0.0415 | 7787.8 |
| R63_4 | mean(dY/dX) | -0.0044 | 0.0058 | -0.7488 | 0.4541 | 1.1 | -0.0158 | 0.0071 | 1418.6 |
| R97 | mean(Weiblich) - mean(Männlich) | 0.0152 | 0.0205 | 0.7392 | 0.4598 | 1.1 | -0.0251 | 0.0555 | 337424.9 |
| R98 | mean(dY/dX) | 0.0010 | 0.0007 | 1.3565 | 0.1750 | 2.5 | -0.0004 | 0.0024 | 19893.8 |
| R99 | mean(1) - mean(0) | 0.0348 | 0.0578 | 0.6018 | 0.5473 | 0.9 | -0.0785 | 0.1481 | 1230360.2 |
| R106 | mean(dY/dX) | 0.0082 | 0.0098 | 0.8332 | 0.4047 | 1.3 | -0.0110 | 0.0274 | 11809.6 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | -0.0187 | 0.0199 | -0.9397 | 0.3474 | 1.5 | -0.0578 | 0.0203 | 313111.1 |
| R69 | mean(Apartment building) - mean(Single family home) | -0.0212 | 0.0207 | -1.0245 | 0.3056 | 1.7 | -0.0618 | 0.0194 | 87226.6 |
| R70 | mean(dY/dX) | -0.0002 | 0.0003 | -0.8374 | 0.4024 | 1.3 | -0.0007 | 0.0003 | 15376.3 |
| Modus | mean(1) - mean(2) | 0.0362 | 0.0190 | 1.9084 | 0.0563 | 4.1 | -0.0010 | 0.0734 | 182788.3 |
| Modus | mean(3) - mean(2) | 0.0021 | 0.0372 | 0.0559 | 0.9554 | 0.1 | -0.0709 | 0.0751 | 539593.6 |
### Goodness of fit measures
#R^2
Nagelkerke_log_own <- numeric(30)
for (i in 1:30) {
Nagelkerke_log_own_res <- c(DescTools::PseudoR2(glm_multiimp_own$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_log_own[i] <- Nagelkerke_log_own_res
}
median(Nagelkerke_log_own)
## [1] 0.1663847
# BIC
BIC_log_own <- numeric(30)
for (i in 1:30) {
BIC_log_own_res <- c(BIC(glm_multiimp_own$analyses[[i]]))
BIC_log_own[i] <- BIC_log_own_res
}
median(BIC_log_own)
## [1] 850.9374
# fit logistic regression to each imputed data set
glm_multiimp_ten <- with(KARE_MI,
glm(admeas ~ R104 + R109 + R71 + # generic capacity
R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = binomial("logit"),
subset = (R7 == "Miete")))
# calculate AME
marginal_ten <- avg_slopes(glm_multiimp_ten, vcov = ~town)
marginal_ten$order <- c(32, 33, 1, 2, 28, 3, 12, 13, 14, 15, 16, 21,
23, 22, 24, 19, 30, 29, 31, 4, 17, 18, 20, 11,
9, 10, 6, 8, 7, 25, 26, 27)
marginal_ten <- marginal_ten %>%
mutate(across(c(3:5, 10:13), round, 4))
b_log_AME_ten <- c(NA, marginal_ten$estimate)
se_log_AME_ten <- c(NA, marginal_ten$std.error)
p_log_AME_ten <- c(NA, marginal_ten$p.value)
print(marginal_ten[order(marginal_ten$order, decreasing = F),], nrows = 40, digits = 4, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | -0.1004 | 0.0861 | -1.1658 | 0.2437 | 2.0 | -0.2692 | 0.0684 | 25478 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | -0.0940 | 0.0839 | -1.1203 | 0.2626 | 1.9 | -0.2584 | 0.0704 | 29048 |
| R109 | mean(dY/dX) | 0.0014 | 0.0134 | 0.1026 | 0.9183 | 0.1 | -0.0248 | 0.0275 | 2977 |
| R71 | mean(dY/dX) | -0.0002 | 0.0008 | -0.2003 | 0.8412 | 0.2 | -0.0018 | 0.0015 | 5759 |
| R91 | mean(dY/dX) | -0.0009 | 0.0012 | -0.7345 | 0.4626 | 1.1 | -0.0034 | 0.0015 | 32521 |
| R92 | mean(medium-term) - mean(unsure/short-term) | -0.0185 | 0.0583 | -0.3168 | 0.7514 | 0.4 | -0.1327 | 0.0958 | 19919 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.0356 | 0.0603 | 0.5911 | 0.5545 | 0.9 | -0.0825 | 0.1538 | 79283 |
| R90_1 | mean(dY/dX) | 0.0379 | 0.0192 | 1.9746 | 0.0483 | 4.4 | 0.0003 | 0.0756 | 17982 |
| R90_5 | mean(dY/dX) | -0.0446 | 0.0239 | -1.8646 | 0.0623 | 4.0 | -0.0916 | 0.0023 | 33723 |
| R9 | mean(dY/dX) | 0.0133 | 0.0142 | 0.9349 | 0.3498 | 1.5 | -0.0146 | 0.0412 | 43215 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.1341 | 0.0805 | 1.6653 | 0.0959 | 3.4 | -0.0237 | 0.2919 | 696755 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.2363 | 0.0928 | 2.5468 | 0.0109 | 6.5 | 0.0545 | 0.4182 | 365419 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.3610 | 0.1154 | 3.1287 | 0.0018 | 9.2 | 0.1349 | 0.5872 | 150216 |
| R4a5 | mean(damage) - mean(none) | 0.3391 | 0.0644 | 5.2660 | <0.001 | 22.8 | 0.2129 | 0.4653 | 402250 |
| R4a5 | mean(experience) - mean(none) | -0.0834 | 0.0642 | -1.2976 | 0.1944 | 2.4 | -0.2092 | 0.0425 | 2289868 |
| R75 | mean(2) - mean(1) | 0.0844 | 0.0969 | 0.8712 | 0.3837 | 1.4 | -0.1055 | 0.2743 | 99287 |
| R75 | mean(3) - mean(1) | 0.2016 | 0.1027 | 1.9624 | 0.0497 | 4.3 | 0.0002 | 0.4029 | 154944 |
| R63_6 | mean(dY/dX) | -0.0191 | 0.0166 | -1.1449 | 0.2523 | 2.0 | -0.0517 | 0.0136 | 14982 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.0070 | 0.0386 | 0.1816 | 0.8559 | 0.2 | -0.0687 | 0.0828 | 10957 |
| R63_1 | mean(dY/dX) | 0.0114 | 0.0156 | 0.7286 | 0.4663 | 1.1 | -0.0192 | 0.0419 | 18543 |
| R63_3 | mean(dY/dX) | 0.0321 | 0.0144 | 2.2184 | 0.0265 | 5.2 | 0.0037 | 0.0604 | 58229 |
| R63_2 | mean(dY/dX) | 0.0694 | 0.0121 | 5.7590 | <0.001 | 26.8 | 0.0458 | 0.0930 | 44171 |
| R63_4 | mean(dY/dX) | -0.0042 | 0.0144 | -0.2930 | 0.7695 | 0.4 | -0.0324 | 0.0240 | 8808 |
| R97 | mean(Weiblich) - mean(Männlich) | -0.0213 | 0.0464 | -0.4592 | 0.6461 | 0.6 | -0.1122 | 0.0696 | 41489 |
| R98 | mean(dY/dX) | -0.0019 | 0.0016 | -1.2086 | 0.2268 | 2.1 | -0.0050 | 0.0012 | 31480 |
| R99 | mean(1) - mean(0) | 0.0342 | 0.1249 | 0.2737 | 0.7843 | 0.4 | -0.2107 | 0.2791 | 646111 |
| R106 | mean(dY/dX) | -0.0021 | 0.0227 | -0.0935 | 0.9255 | 0.1 | -0.0465 | 0.0423 | 4115 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | -0.0107 | 0.0875 | -0.1225 | 0.9025 | 0.1 | -0.1822 | 0.1608 | 330228 |
| R69 | mean(Apartment building) - mean(Single family home) | -0.0847 | 0.0799 | -1.0608 | 0.2888 | 1.8 | -0.2412 | 0.0718 | 274246 |
| R70 | mean(dY/dX) | 0.0009 | 0.0007 | 1.2592 | 0.2084 | 2.3 | -0.0005 | 0.0024 | 705 |
| Modus | mean(1) - mean(2) | 0.0910 | 0.0616 | 1.4771 | 0.1396 | 2.8 | -0.0297 | 0.2117 | 187922 |
| Modus | mean(3) - mean(2) | -0.1229 | 0.0575 | -2.1373 | 0.0326 | 4.9 | -0.2355 | -0.0102 | 168471 |
### Goodness of fit measures
#R^2
Nagelkerke_log_ten <- numeric(30)
for (i in 1:30) {
Nagelkerke_log_ten_res <- c(DescTools::PseudoR2(glm_multiimp_ten$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_log_ten[i] <- Nagelkerke_log_ten_res
}
median(Nagelkerke_log_ten)
## [1] 0.3427274
# BIC
BIC_log_ten <- numeric(30)
for (i in 1:30) {
BIC_log_ten_res <- c(BIC(glm_multiimp_ten$analyses[[i]]))
BIC_log_ten[i] <- BIC_log_ten_res
}
median(BIC_log_ten)
## [1] 649.3526
Create forest plots for the property owners’ and tenants’ results
# label variables
marginal_own <- marginal_own %>%
mutate(label = c("Education - intermediate secondary", "Education - upper secondary", "Income",
"Living area", "Duration of residence","Place attachment - medium-term", "Place attachment - long-term",
"Social network", "Social cohesion", "Future risk perception",
"Risk perception - not likely", "Risk perception - likely", "Risk perception - very likely",
"Previous experience - damage", "Previous experience - experience",
"Main responsibility state", "Expecation in authorities", "Trust in authorities - yes",
"Public protection is sufficient", "Self-efficacy", "Motivation",
"Competing concerns",
NA, NA, NA, NA, NA, NA, NA, NA, NA ),
.after = term)
marginal_ten <- marginal_ten %>%
mutate(label = c("Education - intermediate secondary", "Education - upper secondary", "Income",
"Living area", "Duration of residence","Place attachment - medium-term", "Place attachment - long-term",
"Social network", "Social cohesion", "Future risk perception",
"Risk perception - not likely", "Risk perception - likely", "Risk perception - very likely",
"Previous experience - damage", "Previous experience - experience", "Main responsibility landlord",
"Main responsibility state", "Expecation in authorities", "Trust in authorities - yes",
"Public protection is sufficient", "Self-efficacy", "Motivation",
"Competing concerns",
NA, NA, NA, NA, NA, NA, NA, NA, NA ),
.after = term)
# drop control variables
marginal_own2 <- marginal_own %>% drop_na(label)
marginal_ten2 <- marginal_ten %>% drop_na(label)
# merge results owners & tenants into one dataframe
marginal_own_ten <- merge(marginal_ten2, marginal_own2, by = "label", all = T)
marginal_own_ten <- marginal_own_ten[order(marginal_own_ten$estimate.y, decreasing = TRUE),]
# for plotting: add value for missing landlord which is not within plotted boundaries
marginal_own_ten$estimate.y[is.na(marginal_own_ten$estimate.y)] <- -2
# change colors
colors_log <- c("#588BAE", "#588BAE", "#588BAE", "#588BAE","#588BAE",
"black", "#588BAE", "black", "black", "black",
"#588BAE", "#588BAE", "black", "black", "#588BAE",
"black", "black", "black", "#588BAE","#588BAE",
"#588BAE", "#588BAE", "#588BAE")
colors_log_label <- c("#588BAE", "#588BAE", "#588BAE", "#588BAE", "#588BAE",
"black", "black", "black", "#588BAE", "black",
"black", "#588BAE", "#588BAE", "black", "black",
"black", "#588BAE", "black", "#588BAE", "#588BAE",
"#588BAE", "#588BAE","#588BAE")
# add source label
lab_cap <- expression(paste("Source: own calculation, data from KARE household survey 2022 (",
n[Owners], " = 1,157; ", n[Tenants], " = 414)"))
# plot owners
log_own <- ggplot(marginal_own_ten, aes(y = reorder(label, estimate.y), x = estimate.y, xmin = conf.low.y, xmax = conf.high.y)) +
geom_vline(xintercept = 0, linetype="solid",
color = "#758DA3", size=0.5) +
geom_pointrange(color = colors_log) +
labs(x ="AME", y = "",
title = "Owners", caption = " ") +
xlim(-0.3, 0.6) +
theme_minimal() +
theme(axis.text.y = element_text(colour = colors_log_label, size = 14),
plot.title = element_text(size = 15, hjust = 0.5, face = "bold"))
# plot tenants
log_ten <- ggplot(marginal_own_ten, aes(y = reorder(label, estimate.y), x = estimate.x, xmin = conf.low.x, xmax = conf.high.x)) +
geom_vline(xintercept = 0, linetype="solid",
color = "#758DA3", size=0.5) +
geom_pointrange(color = colors_log) +
labs(x ="AME", y = "",
title = "Tenants", caption = lab_cap) +
xlim(-0.3, 0.6) +
theme_minimal() +
theme(axis.text.y = element_blank(),
plot.title = element_text(size = 15, hjust = 0.5, face = "bold"))
# create title
title_log <- ggdraw() +
draw_label(
"Effect sizes of adaptive capacity indicators explaing household adaptation (yes/no)",
fontface = 'bold', size = 16,x = 0, hjust = 0) +
theme(plot.margin = margin(0, 0, 0, 7))
plot_log <- plot_grid(log_own, log_ten, rel_widths = c(2.1,0.9))
plot_grid(title_log, plot_log, ncol = 1, rel_heights = c(0.1, 1))
To explore which adaptive capacity indicators drive the number of implemented pluvial flood adaptation measures, we use Poisson regression models. Linear regression led to similar findings; however models suffered from heteroscedasticity. Given thar the number of implemented measures is discrete count data, we decided to rather estimate Poisson models. We checked the assumptions for each model seperately, all models were neither overdispersed nor zero-inflated. The effects are presented as average marginal effects (AME) (Mood 2010; Arel-Bundock et al. Forthcoming).
# fit poisson regression to each imputed data set
pois_multiimp <- with(KARE_MI,
glm(numadmeas ~ R104 + R109 + R71 + # generic capacity
R7 + R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = 'poisson'))
# calculate cluster-robust AME
marginal_pois <- avg_slopes(pois_multiimp, vcov = ~town)
marginal_pois$order <- c(32, 33, 1, 2, 28, 3, 12, 13, 14, 15, 16, 21,
23, 22, 24, 19, 30, 29, 5, 31, 4, 17, 18, 20, 11,
9, 10, 6, 8, 7, 25, 26, 27)
marginal_pois <- marginal_pois %>%
mutate(across(c(3:5, 10:13), round, 4))
b_pois_AME <- c(NA, marginal_pois$estimate)
se_pois_AME <- c(NA, marginal_pois$std.error)
p_pois_AME <- c(NA, marginal_pois$p.value)
print(marginal_pois[order(marginal_pois$order, decreasing = F),], nrows = 40, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | 0.3863 | 0.1292 | 2.990 | 0.0028 | 8.5 | 0.1331 | 0.6395 | 22996 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | 0.2646 | 0.1168 | 2.266 | 0.0235 | 5.4 | 0.0357 | 0.4935 | 18928 |
| R109 | mean(dY/dX) | -0.0305 | 0.0163 | -1.870 | 0.0617 | 4.0 | -0.0625 | 0.0015 | 1175 |
| R71 | mean(dY/dX) | 0.0005 | 0.0005 | 0.963 | 0.3356 | 1.6 | -0.0005 | 0.0014 | 5659 |
| R7 | mean(Eigentum) - mean(Miete) | 1.4551 | 0.1320 | 11.027 | <0.001 | 91.5 | 1.1965 | 1.7137 | 179573 |
| R91 | mean(dY/dX) | -0.0058 | 0.0022 | -2.652 | 0.0080 | 7.0 | -0.0101 | -0.0015 | 84292 |
| R92 | mean(medium-term) - mean(unsure/short-term) | 0.1780 | 0.1641 | 1.085 | 0.2779 | 1.8 | -0.1436 | 0.4997 | 275836 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.0392 | 0.1326 | 0.296 | 0.7673 | 0.4 | -0.2207 | 0.2991 | 879276 |
| R90_1 | mean(dY/dX) | 0.1007 | 0.0437 | 2.306 | 0.0211 | 5.6 | 0.0151 | 0.1864 | 115267 |
| R90_5 | mean(dY/dX) | 0.0479 | 0.0362 | 1.323 | 0.1857 | 2.4 | -0.0230 | 0.1189 | 34055 |
| R9 | mean(dY/dX) | 0.0793 | 0.0293 | 2.707 | 0.0068 | 7.2 | 0.0219 | 0.1367 | 110509 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.5763 | 0.1125 | 5.121 | <0.001 | 21.6 | 0.3557 | 0.7969 | 973443 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.6105 | 0.1306 | 4.675 | <0.001 | 18.4 | 0.3545 | 0.8665 | 322478 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.7027 | 0.1551 | 4.532 | <0.001 | 17.4 | 0.3988 | 1.0066 | 173823 |
| R4a5 | mean(damage) - mean(none) | 0.5137 | 0.1054 | 4.876 | <0.001 | 19.8 | 0.3072 | 0.7202 | 1415548 |
| R4a5 | mean(experience) - mean(none) | 0.0866 | 0.1015 | 0.853 | 0.3937 | 1.3 | -0.1124 | 0.2856 | 1743484 |
| R75 | mean(2) - mean(1) | -0.6126 | 0.1947 | -3.146 | 0.0017 | 9.2 | -0.9942 | -0.2310 | 110361 |
| R75 | mean(3) - mean(1) | -0.1229 | 0.0867 | -1.418 | 0.1563 | 2.7 | -0.2929 | 0.0470 | 59998 |
| R63_6 | mean(dY/dX) | -0.0810 | 0.0369 | -2.194 | 0.0282 | 5.1 | -0.1533 | -0.0086 | 6168 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.1301 | 0.0744 | 1.748 | 0.0805 | 3.6 | -0.0158 | 0.2761 | 48333 |
| R63_1 | mean(dY/dX) | -0.0126 | 0.0256 | -0.490 | 0.6242 | 0.7 | -0.0628 | 0.0377 | 21655 |
| R63_3 | mean(dY/dX) | 0.0236 | 0.0254 | 0.927 | 0.3537 | 1.5 | -0.0263 | 0.0735 | 24171 |
| R63_2 | mean(dY/dX) | 0.2960 | 0.0296 | 10.004 | <0.001 | 75.8 | 0.2380 | 0.3540 | 59712 |
| R63_4 | mean(dY/dX) | -0.0292 | 0.0263 | -1.110 | 0.2671 | 1.9 | -0.0808 | 0.0224 | 12431 |
| R97 | mean(Weiblich) - mean(Männlich) | -0.1173 | 0.0850 | -1.381 | 0.1673 | 2.6 | -0.2838 | 0.0492 | 107373 |
| R98 | mean(dY/dX) | 0.0005 | 0.0033 | 0.163 | 0.8708 | 0.2 | -0.0058 | 0.0069 | 23490 |
| R99 | mean(1) - mean(0) | 0.4242 | 0.3058 | 1.387 | 0.1654 | 2.6 | -0.1752 | 1.0235 | 386204 |
| R106 | mean(dY/dX) | 0.0432 | 0.0379 | 1.139 | 0.2545 | 2.0 | -0.0311 | 0.1176 | 106514 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | -0.2273 | 0.0821 | -2.768 | 0.0056 | 7.5 | -0.3882 | -0.0664 | 417393 |
| R69 | mean(Apartment building) - mean(Single family home) | -0.0923 | 0.0998 | -0.925 | 0.3549 | 1.5 | -0.2878 | 0.1032 | 313148 |
| R70 | mean(dY/dX) | -0.0034 | 0.0011 | -3.074 | 0.0021 | 8.9 | -0.0056 | -0.0012 | 4505 |
| Modus | mean(1) - mean(2) | 0.3703 | 0.1033 | 3.586 | <0.001 | 11.5 | 0.1679 | 0.5727 | 1045785 |
| Modus | mean(3) - mean(2) | 0.0420 | 0.1480 | 0.284 | 0.7767 | 0.4 | -0.2481 | 0.3321 | 1032305 |
## Goodness of fit measures
# R^2
Nagelkerke_pois_all <- numeric(30)
for (i in 1:30) {
Nagelkerke_pois_all_res <- c(DescTools::PseudoR2(pois_multiimp$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_pois_all[i] <- Nagelkerke_pois_all_res
}
median(Nagelkerke_pois_all)
## [1] 0.519838
# BIC
BIC_pois_all <- numeric(30)
for (i in 1:30) {
BIC_pois_all_res <- c(BIC(pois_multiimp$analyses[[i]]))
BIC_pois_all[i] <- BIC_pois_all_res
}
median(BIC_pois_all)
## [1] 5448.089
Check if assumptions are met
### Test Assumptions with residual diagnostics
sim_fmp <- simulateResiduals(pois_multiimp$analyses[[5]], nSim = 1000)
# under- and overdispersion --> not sign.
# ratio close to 1 --> Poisson model fits well to the data, no over-/underdispersion
testDispersion(sim_fmp)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.0073, p-value = 0.84
## alternative hypothesis: two.sided
# Zero inflation --> not sign.
testZeroInflation(sim_fmp)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 1.0367, p-value = 0.424
## alternative hypothesis: two.sided
# Heteroscedasticity --> not sign.
testQuantiles(sim_fmp)
##
## Test for location of quantiles via qgam
##
## data: simulationOutput
## p-value = 0.3595
## alternative hypothesis: both
# KS test for correct distribution of residuals --> not sign.
testUniformity(sim_fmp)
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.025682, p-value = 0.2513
## alternative hypothesis: two-sided
# test if outliers are a concern, bootstrap for integer-valued distributions --> not sign.
testOutliers(sim_fmp, type = "bootstrap")
##
## DHARMa bootstrapped outlier test
##
## data: sim_fmp
## outliers at both margin(s) = 3, observations = 1571, p-value = 0.4
## alternative hypothesis: two.sided
## percent confidence interval:
## 0.001273074 0.006365372
## sample estimates:
## outlier frequency (expected: 0.00357097390197327 )
## 0.001909612
# fit poisson regression to each imputed data set
pois_multiimp_own <- with(KARE_MI,
glm(numadmeas ~ R104 + R109 + R71 + # generic capacity
R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = 'poisson',
subset = (R7 == "Eigentum")))
# calculate cluster-robust AME
marginal_pois_own <- avg_slopes(pois_multiimp_own, vcov = ~town)
marginal_pois_own$order <- c(32, 33, 1, 2, 28, 3, 12, 13, 14, 15, 16, 21,
23, 22, 24, 19, 30, 29, 31, 4, 18, 20, 11,
9, 10, 6, 8, 7, 25, 26, 27)
marginal_pois_own <- marginal_pois_own %>%
mutate(across(c(3:5, 10:13), round, 4))
b_pois_AME_own <- c(NA, marginal_pois_own$estimate)
se_pois_AME_own <- c(NA, marginal_pois_own$std.error)
p_pois_AME_own <- c(NA, marginal_pois_own$p.value)
print(marginal_pois_own[order(marginal_pois_own$order, decreasing = F),], nrows = 40, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | 0.5399 | 0.1702 | 3.1714 | 0.0015 | 9.4 | 0.2062 | 0.8736 | 18348 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | 0.3601 | 0.1495 | 2.4086 | 0.0160 | 6.0 | 0.0671 | 0.6532 | 13285 |
| R109 | mean(dY/dX) | -0.0425 | 0.0209 | -2.0318 | 0.0424 | 4.6 | -0.0836 | -0.0015 | 977 |
| R71 | mean(dY/dX) | 0.0005 | 0.0006 | 0.8047 | 0.4211 | 1.2 | -0.0007 | 0.0017 | 5469 |
| R91 | mean(dY/dX) | -0.0075 | 0.0028 | -2.7287 | 0.0064 | 7.3 | -0.0129 | -0.0021 | 73527 |
| R92 | mean(medium-term) - mean(unsure/short-term) | 0.3083 | 0.2310 | 1.3348 | 0.1819 | 2.5 | -0.1444 | 0.7610 | 808334 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.0752 | 0.1703 | 0.4416 | 0.6588 | 0.6 | -0.2586 | 0.4091 | 1072685 |
| R90_1 | mean(dY/dX) | 0.1106 | 0.0541 | 2.0437 | 0.0410 | 4.6 | 0.0045 | 0.2167 | 72516 |
| R90_5 | mean(dY/dX) | 0.0854 | 0.0483 | 1.7666 | 0.0773 | 3.7 | -0.0094 | 0.1801 | 33683 |
| R9 | mean(dY/dX) | 0.1032 | 0.0383 | 2.6918 | 0.0071 | 7.1 | 0.0281 | 0.1784 | 103419 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.6534 | 0.1424 | 4.5895 | <0.001 | 17.8 | 0.3744 | 0.9324 | 1834237 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.6492 | 0.1584 | 4.0987 | <0.001 | 14.6 | 0.3388 | 0.9597 | 385333 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.8122 | 0.1924 | 4.2212 | <0.001 | 15.3 | 0.4351 | 1.1893 | 138061 |
| R4a5 | mean(damage) - mean(none) | 0.5221 | 0.1351 | 3.8639 | <0.001 | 13.1 | 0.2573 | 0.7870 | 1133913 |
| R4a5 | mean(experience) - mean(none) | 0.1024 | 0.1422 | 0.7204 | 0.4713 | 1.1 | -0.1763 | 0.3811 | 1849131 |
| R75 | mean(3) - mean(1) | -0.1614 | 0.1055 | -1.5296 | 0.1261 | 3.0 | -0.3681 | 0.0454 | 66469 |
| R63_6 | mean(dY/dX) | -0.0721 | 0.0470 | -1.5340 | 0.1251 | 3.0 | -0.1642 | 0.0200 | 5686 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.1817 | 0.0989 | 1.8371 | 0.0662 | 3.9 | -0.0122 | 0.3756 | 46275 |
| R63_1 | mean(dY/dX) | -0.0146 | 0.0323 | -0.4523 | 0.6510 | 0.6 | -0.0780 | 0.0487 | 10058 |
| R63_3 | mean(dY/dX) | 0.0123 | 0.0364 | 0.3371 | 0.7360 | 0.4 | -0.0590 | 0.0836 | 32417 |
| R63_2 | mean(dY/dX) | 0.3451 | 0.0394 | 8.7572 | <0.001 | 58.7 | 0.2679 | 0.4224 | 48880 |
| R63_4 | mean(dY/dX) | -0.0454 | 0.0334 | -1.3603 | 0.1738 | 2.5 | -0.1108 | 0.0200 | 8672 |
| R97 | mean(Weiblich) - mean(Männlich) | -0.1406 | 0.1109 | -1.2676 | 0.2049 | 2.3 | -0.3581 | 0.0768 | 102360 |
| R98 | mean(dY/dX) | 0.0003 | 0.0043 | 0.0627 | 0.9500 | 0.1 | -0.0081 | 0.0087 | 23870 |
| R99 | mean(1) - mean(0) | 0.7144 | 0.4747 | 1.5049 | 0.1324 | 2.9 | -0.2160 | 1.6448 | 391310 |
| R106 | mean(dY/dX) | 0.0464 | 0.0512 | 0.9068 | 0.3645 | 1.5 | -0.0539 | 0.1467 | 146009 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | -0.3139 | 0.1020 | -3.0774 | 0.0021 | 8.9 | -0.5138 | -0.1140 | 352528 |
| R69 | mean(Apartment building) - mean(Single family home) | -0.1026 | 0.1334 | -0.7686 | 0.4422 | 1.2 | -0.3641 | 0.1590 | 369244 |
| R70 | mean(dY/dX) | -0.0049 | 0.0015 | -3.3065 | 0.0010 | 10.0 | -0.0079 | -0.0020 | 4685 |
| Modus | mean(1) - mean(2) | 0.4490 | 0.1324 | 3.3925 | <0.001 | 10.5 | 0.1896 | 0.7084 | 748688 |
| Modus | mean(3) - mean(2) | 0.1280 | 0.1722 | 0.7434 | 0.4573 | 1.1 | -0.2095 | 0.4656 | 534637 |
## Goodness of fit measures
# R^2
# R^2
Nagelkerke_pois_own <- numeric(30)
for (i in 1:30) {
Nagelkerke_pois_own_res <- c(DescTools::PseudoR2(pois_multiimp_own$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_pois_own[i] <- Nagelkerke_pois_own_res
}
median(Nagelkerke_pois_own)
## [1] 0.2835901
# BIC
BIC_pois_own <- numeric(30)
for (i in 1:30) {
BIC_pois_own_res <- c(BIC(pois_multiimp_own$analyses[[i]]))
BIC_pois_own[i] <- BIC_pois_own_res
}
median(BIC_pois_own)
## [1] 4508.565
Check if assumptions are met
### Test Assumptions with residual diagnostics
sim_fmp_own <- simulateResiduals(pois_multiimp_own$analyses[[16]], nSim = 1000)
# under- and overdispersion --> not sign.
# ratio close to 1 --> Poisson model fits well to the data, no over-/underdispersion
testDispersion(sim_fmp_own)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.99655, p-value = 0.984
## alternative hypothesis: two.sided
# Zero inflation --> not sign.
testZeroInflation(sim_fmp_own)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0.99941, p-value = 0.968
## alternative hypothesis: two.sided
# Heteroscedasticity --> not sign.
testQuantiles(sim_fmp_own)
##
## Test for location of quantiles via qgam
##
## data: simulationOutput
## p-value = 0.8843
## alternative hypothesis: both
# KS test for correct distribution of residuals --> not sign.
testUniformity(sim_fmp_own)
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.021295, p-value = 0.6704
## alternative hypothesis: two-sided
# test if outliers are a concern, bootstrap for integer-valued distributions --> not sign.
testOutliers(sim_fmp_own, type = "bootstrap")
##
## DHARMa bootstrapped outlier test
##
## data: sim_fmp_own
## outliers at both margin(s) = 0, observations = 1157, p-value = 0.02
## alternative hypothesis: two.sided
## percent confidence interval:
## 0.0008643042 0.0069144339
## sample estimates:
## outlier frequency (expected: 0.00389801210025929 )
## 0
# fit poisson regression to each imputed data set
pois_multiimp_ten <- with(KARE_MI,
glm(numadmeas ~ R104 + R109 + R71 + # generic capacity
R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = 'poisson',
subset = (R7 == "Miete")))
# calculate cluster-robust AME
marginal_pois_ten <- avg_slopes(pois_multiimp_ten, vcov = ~town)
marginal_pois_ten$order <- c(32, 33, 1, 2, 28, 3, 12, 13, 14, 15, 16, 21,
23, 22, 24, 19, 30, 29, 31, 4, 17, 18, 20, 11,
9, 10, 6, 8, 7, 25, 26, 27)
marginal_pois_ten <- marginal_pois_ten %>%
mutate(across(c(3:5, 10:13), round, 4))
b_pois_AME_ten <- c(NA, marginal_pois_ten$estimate)
se_pois_AME_ten <- c(NA, marginal_pois_ten$std.error)
p_pois_AME_ten <- c(NA, marginal_pois_ten$p.value)
print(marginal_pois_ten[order(marginal_pois_ten$order, decreasing = F),], nrows = 40, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | -0.1293 | 0.1782 | -0.7255 | 0.4681 | 1.1 | -0.4786 | 0.2200 | 164795 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | -0.1026 | 0.1643 | -0.6247 | 0.5322 | 0.9 | -0.4246 | 0.2194 | 148508 |
| R109 | mean(dY/dX) | 0.0345 | 0.0232 | 1.4901 | 0.1363 | 2.9 | -0.0109 | 0.0800 | 3800 |
| R71 | mean(dY/dX) | -0.0001 | 0.0010 | -0.0758 | 0.9396 | 0.1 | -0.0020 | 0.0018 | 3760 |
| R91 | mean(dY/dX) | 0.0000 | 0.0023 | 0.0183 | 0.9854 | 0.0 | -0.0045 | 0.0045 | 211384 |
| R92 | mean(medium-term) - mean(unsure/short-term) | 0.0530 | 0.1022 | 0.5181 | 0.6044 | 0.7 | -0.1474 | 0.2534 | 62457 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.1209 | 0.1262 | 0.9580 | 0.3381 | 1.6 | -0.1264 | 0.3682 | 621506 |
| R90_1 | mean(dY/dX) | 0.0703 | 0.0392 | 1.7915 | 0.0732 | 3.8 | -0.0066 | 0.1472 | 92604 |
| R90_5 | mean(dY/dX) | -0.0622 | 0.0405 | -1.5343 | 0.1250 | 3.0 | -0.1416 | 0.0173 | 86698 |
| R9 | mean(dY/dX) | 0.0030 | 0.0344 | 0.0882 | 0.9297 | 0.1 | -0.0644 | 0.0704 | 65118 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.3315 | 0.1405 | 2.3602 | 0.0183 | 5.8 | 0.0562 | 0.6068 | 200717 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.4358 | 0.1576 | 2.7646 | 0.0057 | 7.5 | 0.1268 | 0.7447 | 90848 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.4269 | 0.1740 | 2.4537 | 0.0141 | 6.1 | 0.0859 | 0.7679 | 59217 |
| R4a5 | mean(damage) - mean(none) | 0.7121 | 0.2022 | 3.5217 | <0.001 | 11.2 | 0.3158 | 1.1085 | 246646 |
| R4a5 | mean(experience) - mean(none) | 0.0736 | 0.1218 | 0.6045 | 0.5455 | 0.9 | -0.1651 | 0.3123 | 2027457 |
| R75 | mean(2) - mean(1) | -0.1171 | 0.2434 | -0.4810 | 0.6305 | 0.7 | -0.5942 | 0.3600 | 122878 |
| R75 | mean(3) - mean(1) | 0.1130 | 0.2551 | 0.4431 | 0.6577 | 0.6 | -0.3870 | 0.6131 | 187984 |
| R63_6 | mean(dY/dX) | -0.0785 | 0.0375 | -2.0914 | 0.0365 | 4.8 | -0.1520 | -0.0049 | 8132 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.0135 | 0.1061 | 0.1273 | 0.8987 | 0.2 | -0.1945 | 0.2216 | 38456 |
| R63_1 | mean(dY/dX) | 0.0106 | 0.0323 | 0.3283 | 0.7427 | 0.4 | -0.0528 | 0.0740 | 14156 |
| R63_3 | mean(dY/dX) | 0.0389 | 0.0305 | 1.2742 | 0.2026 | 2.3 | -0.0209 | 0.0988 | 22032 |
| R63_2 | mean(dY/dX) | 0.1510 | 0.0232 | 6.5071 | <0.001 | 33.6 | 0.1055 | 0.1965 | 79376 |
| R63_4 | mean(dY/dX) | -0.0280 | 0.0284 | -0.9866 | 0.3239 | 1.6 | -0.0838 | 0.0277 | 11172 |
| R97 | mean(Weiblich) - mean(Männlich) | -0.0380 | 0.0922 | -0.4123 | 0.6801 | 0.6 | -0.2187 | 0.1427 | 127027 |
| R98 | mean(dY/dX) | -0.0034 | 0.0031 | -1.0917 | 0.2750 | 1.9 | -0.0094 | 0.0027 | 53841 |
| R99 | mean(1) - mean(0) | -0.1489 | 0.1622 | -0.9182 | 0.3585 | 1.5 | -0.4668 | 0.1690 | 742190 |
| R106 | mean(dY/dX) | -0.0213 | 0.0395 | -0.5393 | 0.5897 | 0.8 | -0.0988 | 0.0562 | 7616 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | 0.1248 | 0.1232 | 1.0128 | 0.3112 | 1.7 | -0.1167 | 0.3663 | 38855 |
| R69 | mean(Apartment building) - mean(Single family home) | 0.0426 | 0.1056 | 0.4033 | 0.6867 | 0.5 | -0.1644 | 0.2495 | 152515 |
| R70 | mean(dY/dX) | 0.0006 | 0.0014 | 0.4098 | 0.6821 | 0.6 | -0.0022 | 0.0034 | 629 |
| Modus | mean(1) - mean(2) | 0.2576 | 0.1604 | 1.6056 | 0.1084 | 3.2 | -0.0569 | 0.5720 | 345586 |
| Modus | mean(3) - mean(2) | -0.1303 | 0.1096 | -1.1889 | 0.2345 | 2.1 | -0.3452 | 0.0845 | 802859 |
## Goodness of fit measures
# R^2
# R^2
# R^2
Nagelkerke_pois_ten <- numeric(30)
for (i in 1:30) {
Nagelkerke_pois_ten_res <- c(DescTools::PseudoR2(pois_multiimp_ten$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_pois_ten[i] <- Nagelkerke_pois_ten_res
}
median(Nagelkerke_pois_ten)
## [1] 0.3313186
# BIC
BIC_pois_ten <- numeric(30)
for (i in 1:30) {
BIC_pois_ten_res <- c(BIC(pois_multiimp_ten$analyses[[i]]))
BIC_pois_ten[i] <- BIC_pois_ten_res
}
median(BIC_pois_ten)
## [1] 1057.311
Check if assumptions are met
### Test Assumptions with residual diagnostics
sim_fmp_ten <- simulateResiduals(pois_multiimp_ten$analyses[[16]], nSim = 1000)
# under- and overdispersion --> not sign.
# ratio close to 1 --> Poisson model fits well to the data, no over-/underdispersion
testDispersion(sim_fmp_ten)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.87532, p-value = 0.232
## alternative hypothesis: two.sided
# Zero inflation --> not sign.
testZeroInflation(sim_fmp_ten)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0.99276, p-value = 0.832
## alternative hypothesis: two.sided
# Heteroscedasticity --> not sign.
testQuantiles(sim_fmp_ten)
##
## Test for location of quantiles via qgam
##
## data: simulationOutput
## p-value = 0.6701
## alternative hypothesis: both
# KS test for correct distribution of residuals --> not sign.
testUniformity(sim_fmp_ten)
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.03306, p-value = 0.7561
## alternative hypothesis: two-sided
# test if outliers are a concern, bootstrap for integer-valued distributions --> not sign.
testOutliers(sim_fmp_ten, type = "bootstrap")
##
## DHARMa bootstrapped outlier test
##
## data: sim_fmp_ten
## outliers at both margin(s) = 0, observations = 414, p-value = 0.66
## alternative hypothesis: two.sided
## percent confidence interval:
## 0.000000000 0.007246377
## sample estimates:
## outlier frequency (expected: 0.0027536231884058 )
## 0
Create forest plots for the property owners’ and tenants’ results
# label variables
marginal_pois_own <- marginal_pois_own %>%
mutate(label = c("Education - intermediate secondary", "Education - upper secondary", "Income",
"Living area", "Duration of residence","Place attachment - medium-term", "Place attachment - long-term",
"Social network", "Social cohesion", "Future risk perception",
"Risk perception - not likely", "Risk perception - likely", "Risk perception - very likely",
"Previous experience - damage", "Previous experience - experience",
"Main responsibility state", "Expecation in authorities", "Trust in authorities - yes",
"Public protection is sufficient", "Self-efficacy", "Motivation",
"Competing concerns",
NA, NA, NA, NA, NA, NA, NA, NA, NA ),
.after = term)
marginal_pois_ten <- marginal_pois_ten %>%
mutate(label = c("Education - intermediate secondary", "Education - upper secondary", "Income",
"Living area", "Duration of residence","Place attachment - medium-term", "Place attachment - long-term",
"Social network", "Social cohesion", "Future risk perception",
"Risk perception - not likely", "Risk perception - likely", "Risk perception - very likely",
"Previous experience - damage", "Previous experience - experience", "Main responsibility landlord",
"Main responsibility state", "Expecation in authorities", "Trust in authorities - yes",
"Public protection is sufficient", "Self-efficacy", "Motivation",
"Competing concerns",
NA, NA, NA, NA, NA, NA, NA, NA, NA ),
.after = term)
#drop control variables
marginal_pois_own2 <- marginal_pois_own %>% drop_na(label)
marginal_pois_ten2 <- marginal_pois_ten %>% drop_na(label)
# merge results owners & tenants into one dataframe
marginal_pois_own_ten <- merge(marginal_pois_ten2, marginal_pois_own2, by = "label", all = T)
marginal_pois_own_ten <- marginal_pois_own_ten[order(marginal_pois_own_ten$estimate.y, decreasing = TRUE),]
# for plotting: add value for missing landlord which is not within plotted boundaries
marginal_pois_own_ten$estimate.y[is.na(marginal_pois_own_ten$estimate.y)] <- -2
# change colors
colors_pois <- c("#588BAE", "#588BAE", "#588BAE", "black", "#588BAE",
"black", "#588BAE", "black", "#588BAE", "black",
"#588BAE", "#588BAE", "black", "black", "#588BAE",
"black", "black", "#588BAE", "black", "#588BAE",
"#588BAE", "#588BAE", "#588BAE")
colors_pois_label <- c("#588BAE", "#588BAE", "#588BAE", "#588BAE", "black",
"#588BAE", "black", "black", "#588BAE", "black",
"black", "#588BAE", "#588BAE", "black", "#588BAE",
"black", "#588BAE", "black", "#588BAE", "black",
"#588BAE", "#588BAE", "#588BAE")
lab_cap <- expression(paste("Source: own calculation, data from KARE household survey 2022 (",
n[Owners], " = 1,157; ", n[Tenants], " = 414)"))
pois_own <- ggplot(marginal_pois_own_ten, aes(y = reorder(label, estimate.y), x = estimate.y, xmin = conf.low.y, xmax = conf.high.y)) +
geom_vline(xintercept = 0, linetype="solid",
color = "#758DA3", size=0.5) +
geom_pointrange(color = colors_pois) +
labs(x ="AME", y = "",
title = "Owners", caption = " ") +
xlim(-0.6, 1.3) +
theme_minimal() +
theme(axis.text.y = element_text(colour = colors_pois_label, size = 14),
plot.title = element_text(size = 15, hjust = 0.5, face = "bold"))
pois_ten <- ggplot(marginal_pois_own_ten, aes(y = reorder(label, estimate.y), x = estimate.x, xmin = conf.low.x, xmax = conf.high.x)) +
geom_vline(xintercept = 0, linetype="solid",
color = "#758DA3", size=0.5) +
geom_pointrange(color = colors_pois) +
labs(x ="AME", y = "",
title = "Tenants", caption = lab_cap) +
xlim(-0.6, 1.3) +
theme_minimal() +
theme(axis.text.y = element_blank(),
plot.title = element_text(size = 15, hjust = 0.5, face = "bold"))
title_pois <- ggdraw() +
draw_label("Effect sizes of adaptive capacity indicators explaing the number of implemented pluvial flood adaptation measures",
fontface = 'bold', size = 16, x = 0, hjust = 0) +
theme( plot.margin = margin(0, 0, 0, 7))
plot_pois <- plot_grid(pois_own, pois_ten, rel_widths = c(2.1,0.9))
plot_grid(title_pois, plot_pois, ncol = 1, rel_heights = c(0.1, 1))
We estimated additional models to test the robustness of the income effect. Robustness checks for the income effect were necessary, as household net income was originally collected as binned data, but bin midpoints were used in the models to approximate income. Research has proven that this method works well for mid-income classes (Stauder & Hüning 2004); however, there are deviations in the tails due to small numbers of observations and broader bins. Changes:
log(R109) to compress the range of higher incomes and make the distribution more symmetric.
account for differences between income groups:
low < 1300 € equalised disposable net income,
middle between 1300 € and 4000 €, and
high > 4000€
# load imputed dataset
load(here::here("./data/tidy/KARE_imp_richlog.RData"))
KARE_MI <- KARE_imp
### MID (multiple imputation, then deletion):
# use Y to impute X, but drop obs with missing Y from the analysis (von Hippel 2007)
# create a long format dataset
KARE_MI_long <- complete(KARE_MI, action = "long", include = TRUE)
# drop obs with NA on Y
KARE_MI_long <- subset(KARE_MI_long, !(IDS %in% KARE_MI_long$IDS[is.na(KARE_MI_long$numadmeas)]))
# recode R75 from imputed R75a (tenants - polyreg) and R75b (owners - logreg)
KARE_MI_long$R75 <- ifelse(KARE_MI_long$R7 == "Miete", KARE_MI_long$R75a,
ifelse(KARE_MI_long$R75b == "Öffentliche Stellen (d.h. Bund, Länder oder Gemeinden)", 3, 1))
KARE_MI_long$R75 <- as_factor(KARE_MI_long$R75)
KARE_MI <- mice::as.mids(KARE_MI_long)
# n = 1,571 obs
### turn ordered factors into unordered to get pairwise comparisons
# instead of polynomial contrasts
KARE_MI$data$R18 <- factor(KARE_MI$data$R18,
ordered = F)
KARE_MI$data$R80a_1 <- factor(KARE_MI$data$R80a_1,
ordered = F)
KARE_MI$data$R92 <- factor(KARE_MI$data$R92,
ordered = F)
KARE_MI$data$R104 <- factor(KARE_MI$data$R104,
ordered = F)
KARE_MI$data$Modus <- factor(KARE_MI$data$Modus,
ordered = F)
### change reference levels
KARE_MI$data$Modus <- relevel(KARE_MI$data$Modus, ref = "2")
KARE_MI$data$R92 <- relevel(KARE_MI$data$R92, ref = "unsure/short-term")
KARE_MI$data$R18 <- relevel(KARE_MI$data$R18, ref = "Überhaupt nicht wahrscheinlich")
KARE_MI$data$R4a5 <- relevel(KARE_MI$data$R4a5, ref = "none")
KARE_MI$data$R7 <- relevel(KARE_MI$data$R7, ref = "Miete")
KARE_MI$data$R80a_1 <- relevel(KARE_MI$data$R80a_1, ref = "Rather no/no")
KARE_MI$data$rich <- relevel(KARE_MI$data$rich, ref = "middle")
After preparing the data, run the logistic regression models with cluster-robust standard errors for 1) the full sample
# fit logistic regression to each imputed data set
glm_multiimp <- with(KARE_MI,
glm(admeas ~ R104 + logR109 + rich + R71 + # generic capacity
R7 + R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = binomial("logit")))
# calculate AME
marginal <- avg_slopes(glm_multiimp, vcov = ~town)
marginal$order <- c(34, 35, 1, 2, 30, 14, 15, 16, 17, 18, 23,
25, 24, 26, 21, 32, 31, 7, 33, 6, 19, 20, 22, 13,
11, 12, 8, 10, 9, 27, 28, 29, 3, 4, 5)
marginal <- marginal %>%
mutate(across(c(3:5, 10:13), round, 4))
print(marginal[order(marginal$order, decreasing = F),], nrows = 40, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | -0.0230 | 0.0287 | -0.799 | 0.4242 | 1.2 | -0.0792 | 0.0333 | 14732 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | -0.0167 | 0.0308 | -0.543 | 0.5874 | 0.8 | -0.0772 | 0.0437 | 12069 |
| logR109 | mean(dY/dX) | -0.0056 | 0.0313 | -0.180 | 0.8571 | 0.2 | -0.0670 | 0.0557 | 2648 |
| rich | mean(poor) - mean(middle) | -0.0405 | 0.0412 | -0.981 | 0.3268 | 1.6 | -0.1214 | 0.0405 | 1165 |
| rich | mean(rich) - mean(middle) | 0.0248 | 0.0359 | 0.690 | 0.4903 | 1.0 | -0.0456 | 0.0952 | 2913 |
| R71 | mean(dY/dX) | 0.0001 | 0.0002 | 0.332 | 0.7402 | 0.4 | -0.0003 | 0.0004 | 4304 |
| R7 | mean(Eigentum) - mean(Miete) | 0.2434 | 0.0466 | 5.226 | <0.001 | 22.5 | 0.1521 | 0.3347 | 64659 |
| R91 | mean(dY/dX) | -0.0002 | 0.0006 | -0.367 | 0.7135 | 0.5 | -0.0014 | 0.0009 | 99372 |
| R92 | mean(medium-term) - mean(unsure/short-term) | 0.0053 | 0.0260 | 0.205 | 0.8375 | 0.3 | -0.0457 | 0.0563 | 63502 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.0103 | 0.0264 | 0.392 | 0.6952 | 0.5 | -0.0414 | 0.0621 | 450208 |
| R90_1 | mean(dY/dX) | 0.0191 | 0.0079 | 2.408 | 0.0161 | 6.0 | 0.0036 | 0.0347 | 31063 |
| R90_5 | mean(dY/dX) | -0.0145 | 0.0081 | -1.783 | 0.0746 | 3.7 | -0.0305 | 0.0014 | 54654 |
| R9 | mean(dY/dX) | 0.0057 | 0.0065 | 0.874 | 0.3822 | 1.4 | -0.0071 | 0.0185 | 218393 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.0874 | 0.0314 | 2.786 | 0.0053 | 7.6 | 0.0259 | 0.1489 | 219316 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.1042 | 0.0428 | 2.436 | 0.0149 | 6.1 | 0.0204 | 0.1881 | 181728 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.1709 | 0.0431 | 3.963 | <0.001 | 13.7 | 0.0864 | 0.2554 | 41913 |
| R4a5 | mean(damage) - mean(none) | 0.1161 | 0.0230 | 5.049 | <0.001 | 21.1 | 0.0711 | 0.1612 | 1194555 |
| R4a5 | mean(experience) - mean(none) | -0.0085 | 0.0267 | -0.320 | 0.7490 | 0.4 | -0.0609 | 0.0438 | 8389552 |
| R75 | mean(2) - mean(1) | -0.0407 | 0.0411 | -0.991 | 0.3218 | 1.6 | -0.1212 | 0.0398 | 66761 |
| R75 | mean(3) - mean(1) | 0.0056 | 0.0231 | 0.243 | 0.8083 | 0.3 | -0.0397 | 0.0509 | 29066 |
| R63_6 | mean(dY/dX) | -0.0097 | 0.0072 | -1.349 | 0.1776 | 2.5 | -0.0238 | 0.0044 | 3326 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.0027 | 0.0139 | 0.194 | 0.8461 | 0.2 | -0.0246 | 0.0300 | 4470 |
| R63_1 | mean(dY/dX) | -0.0021 | 0.0065 | -0.320 | 0.7489 | 0.4 | -0.0148 | 0.0106 | 7372 |
| R63_3 | mean(dY/dX) | 0.0112 | 0.0053 | 2.117 | 0.0343 | 4.9 | 0.0008 | 0.0215 | 4865 |
| R63_2 | mean(dY/dX) | 0.0379 | 0.0067 | 5.639 | <0.001 | 25.7 | 0.0247 | 0.0511 | 4418 |
| R63_4 | mean(dY/dX) | -0.0035 | 0.0060 | -0.585 | 0.5589 | 0.8 | -0.0152 | 0.0082 | 3614 |
| R97 | mean(Weiblich) - mean(Männlich) | 0.0033 | 0.0209 | 0.158 | 0.8744 | 0.2 | -0.0377 | 0.0443 | 155594 |
| R98 | mean(dY/dX) | 0.0001 | 0.0008 | 0.194 | 0.8460 | 0.2 | -0.0013 | 0.0016 | 106635 |
| R99 | mean(1) - mean(0) | 0.0361 | 0.0513 | 0.703 | 0.4821 | 1.1 | -0.0645 | 0.1367 | 757923 |
| R106 | mean(dY/dX) | 0.0124 | 0.0117 | 1.061 | 0.2887 | 1.8 | -0.0105 | 0.0354 | 6554 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | -0.0236 | 0.0230 | -1.023 | 0.3061 | 1.7 | -0.0688 | 0.0216 | 110336 |
| R69 | mean(Apartment building) - mean(Single family home) | -0.0385 | 0.0237 | -1.626 | 0.1040 | 3.3 | -0.0850 | 0.0079 | 263741 |
| R70 | mean(dY/dX) | 0.0001 | 0.0003 | 0.295 | 0.7677 | 0.4 | -0.0005 | 0.0007 | 2192 |
| Modus | mean(1) - mean(2) | 0.0474 | 0.0197 | 2.406 | 0.0161 | 6.0 | 0.0088 | 0.0860 | 262433 |
| Modus | mean(3) - mean(2) | -0.0314 | 0.0339 | -0.926 | 0.3544 | 1.5 | -0.0977 | 0.0350 | 767463 |
### Goodness of fit measures
#R^2
Nagelkerke_log_all <- numeric(30)
for (i in 1:30) {
Nagelkerke_log_all_res <- c(DescTools::PseudoR2(glm_multiimp$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_log_all[i] <- Nagelkerke_log_all_res
}
median(Nagelkerke_log_all)
## [1] 0.4194191
# BIC
BIC_log_all <- numeric(30)
for (i in 1:30) {
BIC_log_all_res <- c(BIC(glm_multiimp$analyses[[i]]))
BIC_log_all[i] <- BIC_log_all_res
}
median(BIC_log_all)
## [1] 1369.911
We checked the four logistic regression assumptions: 1) linearity, 2) independence, 3) no multicollinearity, and 4) exogeneity. Tests revealed no problems.
# 1) Linearity
plot(glm_multiimp$analyses[[7]], 1)
# not useful to plot the raw residuals, examine binned residual plots
arm::binnedplot(fitted(glm_multiimp$analyses[[7]]),
residuals(glm_multiimp$analyses[[7]], type = "response"),
nclass = NULL,
xlab = "Expected Values",
ylab = "Average residual",
main = "Binned residual plot",
cex.pts = 0.8,
col.pts = 1,
col.int = "gray")
KARE_data6 <- complete(KARE_MI, action=6)
arm::binnedplot(KARE_data6$logR109,
residuals(glm_multiimp$analyses[[6]], type = "response"),
nclass = NULL,
xlab = "Expected Values",
ylab = "Average residual",
main = "Binned residual plot",
cex.pts = 0.8,
col.pts = 1,
col.int = "gray")
# 2) Independence
# --> Respondents from the same town might be more similar to one another on the
# outcome measure, on average, than they are with respondents across towns
# plot residuals vs spatial variable
glm_data6 <- glm(data = KARE_data6,
admeas ~ R104 + logR109 + rich + R71 + # generic capacity
R7 + R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = binomial("logit"))
ggplot(data = data.frame(x = KARE_data6$town, y = glm_data6$residuals), aes(x = x, y = y)) +
geom_boxplot() +
coord_cartesian(ylim = c(-20, 20))
# some patterns --> use cluster-robust SEs
# maybe also due to small cluster sizes (e.g. 2, 5, 10 respondents)
# 3) No multicollinearity
car::vif(glm_multiimp$analyses[[10]])
## GVIF Df GVIF^(1/(2*Df))
## R104 1.296 2 1.067
## logR109 4.233 1 2.057
## rich 3.429 2 1.361
## R71 1.552 1 1.246
## R7 3.522 1 1.877
## R91 1.581 1 1.257
## R92 1.384 2 1.085
## R90_1 1.572 1 1.254
## R90_5 1.659 1 1.288
## R9 1.180 1 1.086
## R18 1.628 3 1.085
## R4a5 1.226 2 1.052
## R75 3.241 2 1.342
## R63_6 1.207 1 1.099
## R80a_1 1.119 1 1.058
## R63_1 1.228 1 1.108
## R63_3 1.221 1 1.105
## R63_2 1.225 1 1.107
## R63_4 1.141 1 1.068
## R97 1.144 1 1.070
## R98 1.678 1 1.295
## R99 1.052 1 1.026
## R106 2.337 1 1.529
## R69 1.423 2 1.092
## R70 1.110 1 1.053
## Modus 1.522 2 1.111
# GVIF to the power of 1/2df makes the value of the GVIF comparable across
# different number of parameters -->
# GVIF (1/(2*Df) < 2.1 for all --> no multicollinearity
# 4) Exogeneity
# --> all relevant CVs are included in the model
# fit logistic regression to each imputed data set
glm_multiimp_own <- with(KARE_MI,
glm(admeas ~ R104 + logR109 + rich + R71 + # generic capacity
R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = binomial("logit"),
subset = (R7 == "Eigentum")))
# calculate AME
marginal_own <- avg_slopes(glm_multiimp_own, vcov = ~town)
marginal_own$order <- c(34, 35, 1, 2, 30, 14, 15, 16, 17, 18, 23,
25, 24, 26, 21, 32, 31, 33, 6, 20, 22, 13,
11, 12, 8, 10, 9, 27, 28, 29, 3, 4, 5)
marginal_own <- marginal_own %>%
mutate(across(c(3:5, 10:13), round, 4))
print(marginal_own[order(marginal_own$order, decreasing = F),], nrows = 40, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | -0.0017 | 0.0295 | -0.0574 | 0.9542 | 0.1 | -0.0595 | 0.0561 | 14996 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | 0.0030 | 0.0308 | 0.0963 | 0.9233 | 0.1 | -0.0573 | 0.0633 | 12325 |
| logR109 | mean(dY/dX) | 0.0112 | 0.0310 | 0.3609 | 0.7182 | 0.5 | -0.0496 | 0.0720 | 2982 |
| rich | mean(poor) - mean(middle) | -0.0214 | 0.0460 | -0.4655 | 0.6417 | 0.6 | -0.1116 | 0.0688 | 935 |
| rich | mean(rich) - mean(middle) | 0.0183 | 0.0370 | 0.4945 | 0.6210 | 0.7 | -0.0543 | 0.0910 | 2448 |
| R71 | mean(dY/dX) | 0.0001 | 0.0002 | 0.4583 | 0.6468 | 0.6 | -0.0002 | 0.0004 | 4697 |
| R91 | mean(dY/dX) | -0.0001 | 0.0006 | -0.2269 | 0.8205 | 0.3 | -0.0013 | 0.0010 | 154864 |
| R92 | mean(medium-term) - mean(unsure/short-term) | 0.0277 | 0.0370 | 0.7492 | 0.4537 | 1.1 | -0.0447 | 0.1001 | 1186943 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.0009 | 0.0246 | 0.0351 | 0.9720 | 0.0 | -0.0474 | 0.0491 | 388426 |
| R90_1 | mean(dY/dX) | 0.0153 | 0.0066 | 2.3087 | 0.0210 | 5.6 | 0.0023 | 0.0283 | 21006 |
| R90_5 | mean(dY/dX) | -0.0026 | 0.0075 | -0.3432 | 0.7315 | 0.5 | -0.0173 | 0.0121 | 80378 |
| R9 | mean(dY/dX) | 0.0042 | 0.0064 | 0.6572 | 0.5110 | 1.0 | -0.0083 | 0.0167 | 202548 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.0604 | 0.0288 | 2.0975 | 0.0360 | 4.8 | 0.0040 | 0.1169 | 170367 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.0405 | 0.0363 | 1.1168 | 0.2641 | 1.9 | -0.0306 | 0.1116 | 55600 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.1149 | 0.0379 | 3.0329 | 0.0024 | 8.7 | 0.0406 | 0.1891 | 9367 |
| R4a5 | mean(damage) - mean(none) | 0.0557 | 0.0221 | 2.5209 | 0.0117 | 6.4 | 0.0124 | 0.0990 | 1802590 |
| R4a5 | mean(experience) - mean(none) | 0.0154 | 0.0225 | 0.6849 | 0.4934 | 1.0 | -0.0287 | 0.0596 | 3559625 |
| R75 | mean(3) - mean(1) | -0.0054 | 0.0190 | -0.2852 | 0.7755 | 0.4 | -0.0427 | 0.0318 | 22353 |
| R63_6 | mean(dY/dX) | -0.0044 | 0.0074 | -0.5893 | 0.5557 | 0.8 | -0.0189 | 0.0101 | 2063 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.0016 | 0.0158 | 0.0986 | 0.9215 | 0.1 | -0.0295 | 0.0326 | 16702 |
| R63_1 | mean(dY/dX) | -0.0049 | 0.0068 | -0.7297 | 0.4656 | 1.1 | -0.0182 | 0.0083 | 6215 |
| R63_3 | mean(dY/dX) | 0.0040 | 0.0063 | 0.6271 | 0.5306 | 0.9 | -0.0084 | 0.0164 | 3509 |
| R63_2 | mean(dY/dX) | 0.0266 | 0.0080 | 3.3223 | <0.001 | 10.1 | 0.0109 | 0.0423 | 4929 |
| R63_4 | mean(dY/dX) | -0.0038 | 0.0060 | -0.6406 | 0.5219 | 0.9 | -0.0156 | 0.0079 | 1043 |
| R97 | mean(Weiblich) - mean(Männlich) | 0.0153 | 0.0207 | 0.7407 | 0.4589 | 1.1 | -0.0252 | 0.0558 | 89864 |
| R98 | mean(dY/dX) | 0.0010 | 0.0007 | 1.4063 | 0.1596 | 2.6 | -0.0004 | 0.0024 | 47057 |
| R99 | mean(1) - mean(0) | 0.0367 | 0.0553 | 0.6635 | 0.5070 | 1.0 | -0.0717 | 0.1451 | 867745 |
| R106 | mean(dY/dX) | 0.0123 | 0.0133 | 0.9217 | 0.3567 | 1.5 | -0.0138 | 0.0384 | 6856 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | -0.0196 | 0.0206 | -0.9531 | 0.3405 | 1.6 | -0.0599 | 0.0207 | 101697 |
| R69 | mean(Apartment building) - mean(Single family home) | -0.0206 | 0.0205 | -1.0031 | 0.3158 | 1.7 | -0.0609 | 0.0197 | 201643 |
| R70 | mean(dY/dX) | -0.0003 | 0.0003 | -0.9732 | 0.3305 | 1.6 | -0.0008 | 0.0003 | 4522 |
| Modus | mean(1) - mean(2) | 0.0365 | 0.0192 | 1.9062 | 0.0566 | 4.1 | -0.0010 | 0.0741 | 146704 |
| Modus | mean(3) - mean(2) | 0.0049 | 0.0365 | 0.1355 | 0.8922 | 0.2 | -0.0666 | 0.0765 | 324943 |
### Goodness of fit measures
#R^2
Nagelkerke_log_own <- numeric(30)
for (i in 1:30) {
Nagelkerke_log_own_res <- c(DescTools::PseudoR2(glm_multiimp_own$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_log_own[i] <- Nagelkerke_log_own_res
}
median(Nagelkerke_log_own)
## [1] 0.1686942
# BIC
BIC_log_own <- numeric(30)
for (i in 1:30) {
BIC_log_own_res <- c(BIC(glm_multiimp_own$analyses[[i]]))
BIC_log_own[i] <- BIC_log_own_res
}
median(BIC_log_own)
## [1] 863.7056
# fit logistic regression to each imputed data set
glm_multiimp_ten <- with(KARE_MI,
glm(admeas ~ R104 + logR109 + rich + R71 + # generic capacity
R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = binomial("logit"),
subset = (R7 == "Miete")))
# calculate AME
marginal_ten <- avg_slopes(glm_multiimp_ten, vcov = ~town)
marginal_ten$order <- c(34, 35, 1, 2, 30, 14, 15, 16, 17, 18, 23,
25, 24, 26, 21, 32, 31, 33, 6, 19, 20, 22, 13,
11, 12, 8, 10, 9, 27, 28, 29, 3, 4, 5)
marginal_ten <- marginal_ten %>%
mutate(across(c(3:5, 10:13), round, 4))
print(marginal_ten[order(marginal_ten$order, decreasing = F),], nrows = 40, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | -0.0919 | 0.0874 | -1.051 | 0.2933 | 1.8 | -0.2632 | 0.0795 | 32362 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | -0.0898 | 0.0839 | -1.071 | 0.2843 | 1.8 | -0.2542 | 0.0746 | 26415 |
| logR109 | mean(dY/dX) | -0.0536 | 0.0752 | -0.712 | 0.4765 | 1.1 | -0.2011 | 0.0939 | 3019 |
| rich | mean(poor) - mean(middle) | -0.0772 | 0.0710 | -1.088 | 0.2769 | 1.9 | -0.2165 | 0.0621 | 1167 |
| rich | mean(rich) - mean(middle) | 0.0678 | 0.0905 | 0.750 | 0.4534 | 1.1 | -0.1096 | 0.2452 | 2695 |
| R71 | mean(dY/dX) | -0.0002 | 0.0008 | -0.248 | 0.8042 | 0.3 | -0.0017 | 0.0014 | 6222 |
| R91 | mean(dY/dX) | -0.0009 | 0.0013 | -0.701 | 0.4833 | 1.0 | -0.0035 | 0.0016 | 19738 |
| R92 | mean(medium-term) - mean(unsure/short-term) | -0.0172 | 0.0585 | -0.293 | 0.7692 | 0.4 | -0.1317 | 0.0974 | 17934 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.0372 | 0.0601 | 0.619 | 0.5361 | 0.9 | -0.0806 | 0.1550 | 126262 |
| R90_1 | mean(dY/dX) | 0.0361 | 0.0186 | 1.943 | 0.0521 | 4.3 | -0.0003 | 0.0724 | 18218 |
| R90_5 | mean(dY/dX) | -0.0446 | 0.0235 | -1.894 | 0.0583 | 4.1 | -0.0907 | 0.0016 | 42849 |
| R9 | mean(dY/dX) | 0.0123 | 0.0146 | 0.844 | 0.3984 | 1.3 | -0.0163 | 0.0409 | 65883 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.1339 | 0.0789 | 1.697 | 0.0896 | 3.5 | -0.0207 | 0.2885 | 206313 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.2351 | 0.0941 | 2.498 | 0.0125 | 6.3 | 0.0506 | 0.4195 | 210619 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.3563 | 0.1159 | 3.074 | 0.0021 | 8.9 | 0.1291 | 0.5834 | 94778 |
| R4a5 | mean(damage) - mean(none) | 0.3459 | 0.0632 | 5.469 | <0.001 | 24.4 | 0.2219 | 0.4698 | 199300 |
| R4a5 | mean(experience) - mean(none) | -0.0745 | 0.0649 | -1.149 | 0.2506 | 2.0 | -0.2017 | 0.0526 | 325028 |
| R75 | mean(2) - mean(1) | 0.0893 | 0.0990 | 0.902 | 0.3670 | 1.4 | -0.1047 | 0.2834 | 49630 |
| R75 | mean(3) - mean(1) | 0.2046 | 0.1048 | 1.953 | 0.0508 | 4.3 | -0.0007 | 0.4099 | 79949 |
| R63_6 | mean(dY/dX) | -0.0187 | 0.0168 | -1.116 | 0.2646 | 1.9 | -0.0517 | 0.0142 | 9092 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.0109 | 0.0389 | 0.280 | 0.7794 | 0.4 | -0.0654 | 0.0873 | 11063 |
| R63_1 | mean(dY/dX) | 0.0112 | 0.0160 | 0.701 | 0.4837 | 1.0 | -0.0202 | 0.0426 | 6824 |
| R63_3 | mean(dY/dX) | 0.0325 | 0.0141 | 2.306 | 0.0211 | 5.6 | 0.0049 | 0.0601 | 18588 |
| R63_2 | mean(dY/dX) | 0.0675 | 0.0124 | 5.441 | <0.001 | 24.1 | 0.0432 | 0.0918 | 8004 |
| R63_4 | mean(dY/dX) | -0.0035 | 0.0144 | -0.247 | 0.8051 | 0.3 | -0.0317 | 0.0246 | 8782 |
| R97 | mean(Weiblich) - mean(Männlich) | -0.0220 | 0.0468 | -0.471 | 0.6378 | 0.6 | -0.1138 | 0.0697 | 52536 |
| R98 | mean(dY/dX) | -0.0017 | 0.0016 | -1.108 | 0.2679 | 1.9 | -0.0048 | 0.0013 | 17586 |
| R99 | mean(1) - mean(0) | 0.0403 | 0.1250 | 0.323 | 0.7469 | 0.4 | -0.2046 | 0.2853 | 579400 |
| R106 | mean(dY/dX) | 0.0155 | 0.0249 | 0.622 | 0.5342 | 0.9 | -0.0334 | 0.0644 | 3820 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | -0.0108 | 0.0893 | -0.121 | 0.9034 | 0.1 | -0.1858 | 0.1642 | 82066 |
| R69 | mean(Apartment building) - mean(Single family home) | -0.0849 | 0.0805 | -1.055 | 0.2914 | 1.8 | -0.2426 | 0.0728 | 150400 |
| R70 | mean(dY/dX) | 0.0009 | 0.0008 | 1.163 | 0.2455 | 2.0 | -0.0006 | 0.0025 | 431 |
| Modus | mean(1) - mean(2) | 0.0919 | 0.0619 | 1.485 | 0.1375 | 2.9 | -0.0294 | 0.2131 | 153845 |
| Modus | mean(3) - mean(2) | -0.1148 | 0.0584 | -1.966 | 0.0493 | 4.3 | -0.2293 | -0.0004 | 105677 |
### Goodness of fit measures
#R^2
Nagelkerke_log_ten <- numeric(30)
for (i in 1:30) {
Nagelkerke_log_ten_res <- c(DescTools::PseudoR2(glm_multiimp_ten$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_log_ten[i] <- Nagelkerke_log_ten_res
}
median(Nagelkerke_log_ten)
## [1] 0.3448924
# BIC
BIC_log_ten <- numeric(30)
for (i in 1:30) {
BIC_log_ten_res <- c(BIC(glm_multiimp_ten$analyses[[i]]))
BIC_log_ten[i] <- BIC_log_ten_res
}
median(BIC_log_ten)
## [1] 660.4991
Poisson regression models with cluster-robust standard errors for 1) the full sample
# fit logistic regression to each imputed data set
pois_multiimp <- with(KARE_MI,
glm(numadmeas ~ R104 + logR109 + rich + R71 + # generic capacity
R7 + R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = 'poisson'))
# calculate AME
marginal_pois <- avg_slopes(pois_multiimp, vcov = ~town)
marginal_pois$order <- c(34, 35, 1, 2, 30,14, 15, 16, 17, 18, 23,
25, 24, 26, 21, 32, 31, 7, 33, 6, 19, 20, 22, 13,
11, 12, 8, 10, 9, 27, 28, 29, 3, 4, 5)
marginal_pois <- marginal_pois %>%
mutate(across(c(3:5, 10:13), round, 4))
print(marginal_pois[order(marginal_pois$order, decreasing = F),], nrows = 40, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | 0.3916 | 0.1289 | 3.037 | 0.0024 | 8.7 | 0.1389 | 0.6443 | 32418 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | 0.2788 | 0.1191 | 2.342 | 0.0192 | 5.7 | 0.0454 | 0.5122 | 14501 |
| logR109 | mean(dY/dX) | -0.0991 | 0.1297 | -0.764 | 0.4447 | 1.2 | -0.3536 | 0.1553 | 968 |
| rich | mean(poor) - mean(middle) | -0.0884 | 0.1725 | -0.512 | 0.6086 | 0.7 | -0.4267 | 0.2499 | 2126 |
| rich | mean(rich) - mean(middle) | -0.2045 | 0.1464 | -1.397 | 0.1628 | 2.6 | -0.4919 | 0.0828 | 947 |
| R71 | mean(dY/dX) | 0.0005 | 0.0005 | 0.993 | 0.3209 | 1.6 | -0.0005 | 0.0015 | 2800 |
| R7 | mean(Eigentum) - mean(Miete) | 1.4550 | 0.1324 | 10.987 | <0.001 | 90.8 | 1.1954 | 1.7145 | 181309 |
| R91 | mean(dY/dX) | -0.0059 | 0.0022 | -2.671 | 0.0076 | 7.0 | -0.0102 | -0.0016 | 131133 |
| R92 | mean(medium-term) - mean(unsure/short-term) | 0.1708 | 0.1645 | 1.038 | 0.2992 | 1.7 | -0.1516 | 0.4932 | 550343 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.0413 | 0.1340 | 0.308 | 0.7582 | 0.4 | -0.2214 | 0.3039 | 513842 |
| R90_1 | mean(dY/dX) | 0.1031 | 0.0446 | 2.313 | 0.0207 | 5.6 | 0.0157 | 0.1905 | 34604 |
| R90_5 | mean(dY/dX) | 0.0467 | 0.0362 | 1.290 | 0.1970 | 2.3 | -0.0242 | 0.1176 | 18090 |
| R9 | mean(dY/dX) | 0.0791 | 0.0290 | 2.728 | 0.0064 | 7.3 | 0.0223 | 0.1359 | 43717 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.5840 | 0.1124 | 5.197 | <0.001 | 22.2 | 0.3638 | 0.8043 | 1571075 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.6160 | 0.1305 | 4.720 | <0.001 | 18.7 | 0.3602 | 0.8717 | 228904 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.6982 | 0.1566 | 4.458 | <0.001 | 16.9 | 0.3912 | 1.0052 | 63606 |
| R4a5 | mean(damage) - mean(none) | 0.5234 | 0.1058 | 4.948 | <0.001 | 20.3 | 0.3161 | 0.7307 | 2338226 |
| R4a5 | mean(experience) - mean(none) | 0.0863 | 0.1023 | 0.844 | 0.3984 | 1.3 | -0.1141 | 0.2868 | 703492 |
| R75 | mean(2) - mean(1) | -0.6172 | 0.1957 | -3.154 | 0.0016 | 9.3 | -1.0008 | -0.2337 | 85518 |
| R75 | mean(3) - mean(1) | -0.1376 | 0.0862 | -1.596 | 0.1105 | 3.2 | -0.3065 | 0.0314 | 75094 |
| R63_6 | mean(dY/dX) | -0.0810 | 0.0363 | -2.232 | 0.0257 | 5.3 | -0.1521 | -0.0098 | 5682 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.1295 | 0.0741 | 1.747 | 0.0806 | 3.6 | -0.0158 | 0.2747 | 48430 |
| R63_1 | mean(dY/dX) | -0.0154 | 0.0256 | -0.600 | 0.5485 | 0.9 | -0.0656 | 0.0348 | 6979 |
| R63_3 | mean(dY/dX) | 0.0217 | 0.0255 | 0.849 | 0.3961 | 1.3 | -0.0284 | 0.0717 | 7276 |
| R63_2 | mean(dY/dX) | 0.2945 | 0.0299 | 9.853 | <0.001 | 73.6 | 0.2359 | 0.3531 | 49171 |
| R63_4 | mean(dY/dX) | -0.0280 | 0.0268 | -1.047 | 0.2950 | 1.8 | -0.0805 | 0.0244 | 6989 |
| R97 | mean(Weiblich) - mean(Männlich) | -0.1193 | 0.0842 | -1.417 | 0.1563 | 2.7 | -0.2843 | 0.0457 | 74075 |
| R98 | mean(dY/dX) | 0.0008 | 0.0032 | 0.236 | 0.8130 | 0.3 | -0.0056 | 0.0071 | 42165 |
| R99 | mean(1) - mean(0) | 0.4190 | 0.3032 | 1.382 | 0.1670 | 2.6 | -0.1753 | 1.0133 | 951917 |
| R106 | mean(dY/dX) | 0.0381 | 0.0478 | 0.798 | 0.4248 | 1.2 | -0.0555 | 0.1317 | 7405 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | -0.2251 | 0.0820 | -2.746 | 0.0060 | 7.4 | -0.3859 | -0.0644 | 169539 |
| R69 | mean(Apartment building) - mean(Single family home) | -0.0880 | 0.0991 | -0.888 | 0.3745 | 1.4 | -0.2822 | 0.1062 | 352314 |
| R70 | mean(dY/dX) | -0.0035 | 0.0011 | -3.150 | 0.0016 | 9.2 | -0.0057 | -0.0013 | 5750 |
| Modus | mean(1) - mean(2) | 0.3671 | 0.1036 | 3.545 | <0.001 | 11.3 | 0.1641 | 0.5701 | 909193 |
| Modus | mean(3) - mean(2) | 0.0389 | 0.1483 | 0.262 | 0.7933 | 0.3 | -0.2518 | 0.3295 | 935260 |
### Goodness of fit measures
# R^2
Nagelkerke_pois_all <- numeric(30)
for (i in 1:30) {
Nagelkerke_pois_all_res <- c(DescTools::PseudoR2(pois_multiimp$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_pois_all[i] <- Nagelkerke_pois_all_res
}
median(Nagelkerke_pois_all)
## [1] 0.5213463
# BIC
BIC_pois_all <- numeric(30)
for (i in 1:30) {
BIC_pois_all_res <- c(BIC(pois_multiimp$analyses[[i]]))
BIC_pois_all[i] <- BIC_pois_all_res
}
median(BIC_pois_all)
## [1] 5458.047
Check if assumptions are met
### Test Assumptions with residual diagnostics
sim_fmp <- simulateResiduals(pois_multiimp$analyses[[5]], nSim = 1000)
# under- and overdispersion --> not sign.
# ratio close to 1 --> Poisson model fits well to the data, no over-/underdispersion
testDispersion(sim_fmp)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1, p-value = 0.9
## alternative hypothesis: two.sided
# Zero inflation --> not sign.
testZeroInflation(sim_fmp)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 1, p-value = 0.4
## alternative hypothesis: two.sided
# Heteroscedasticity --> not sign.
testQuantiles(sim_fmp)
##
## Test for location of quantiles via qgam
##
## data: simulationOutput
## p-value = 0.3
## alternative hypothesis: both
# KS test for correct distribution of residuals --> not sign.
testUniformity(sim_fmp)
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.023, p-value = 0.4
## alternative hypothesis: two-sided
# test if outliers are a concern, bootstrap for integer-valued distributions --> not sign.
testOutliers(sim_fmp, type = "bootstrap")
##
## DHARMa bootstrapped outlier test
##
## data: sim_fmp
## outliers at both margin(s) = 2, observations = 1571, p-value = 0.1
## alternative hypothesis: two.sided
## percent confidence interval:
## 0.001273 0.006365
## sample estimates:
## outlier frequency (expected: 0.00350732017823043 )
## 0.001273
# fit poisson regression to each imputed data set
pois_multiimp_own <- with(KARE_MI,
glm(numadmeas ~ R104 + logR109 + rich + R71 + # generic capacity
R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = 'poisson',
subset = (R7 == "Eigentum")))
# calculate cluster-robust AME
marginal_pois_own <- avg_slopes(pois_multiimp_own, vcov = ~town)
marginal_pois_own$order <- c(34, 35, 1, 2, 30, 14, 15, 16, 17, 18, 23,
25, 24, 26, 21, 32, 31, 33, 6, 20, 22, 13,
11, 12, 8, 10, 9, 27, 28, 29, 3, 4, 5)
marginal_pois_own <- marginal_pois_own %>%
mutate(across(c(3:5, 10:13), round, 4))
print(marginal_pois_own[order(marginal_pois_own$order, decreasing = F),], nrows = 40, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | 0.5488 | 0.1704 | 3.221 | 0.0013 | 9.6 | 0.2148 | 0.8828 | 30622 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | 0.3821 | 0.1518 | 2.517 | 0.0119 | 6.4 | 0.0845 | 0.6797 | 11713 |
| logR109 | mean(dY/dX) | -0.1505 | 0.1684 | -0.894 | 0.3717 | 1.4 | -0.4809 | 0.1800 | 975 |
| rich | mean(poor) - mean(middle) | -0.0724 | 0.2286 | -0.317 | 0.7514 | 0.4 | -0.5208 | 0.3760 | 1603 |
| rich | mean(rich) - mean(middle) | -0.2337 | 0.1944 | -1.202 | 0.2296 | 2.1 | -0.6152 | 0.1478 | 982 |
| R71 | mean(dY/dX) | 0.0005 | 0.0007 | 0.843 | 0.3991 | 1.3 | -0.0007 | 0.0018 | 4457 |
| R91 | mean(dY/dX) | -0.0076 | 0.0028 | -2.709 | 0.0068 | 7.2 | -0.0131 | -0.0021 | 94356 |
| R92 | mean(medium-term) - mean(unsure/short-term) | 0.2981 | 0.2317 | 1.287 | 0.1981 | 2.3 | -0.1559 | 0.7522 | 648346 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.0762 | 0.1716 | 0.444 | 0.6572 | 0.6 | -0.2602 | 0.4125 | 464932 |
| R90_1 | mean(dY/dX) | 0.1152 | 0.0553 | 2.084 | 0.0371 | 4.8 | 0.0069 | 0.2235 | 40891 |
| R90_5 | mean(dY/dX) | 0.0832 | 0.0480 | 1.732 | 0.0834 | 3.6 | -0.0110 | 0.1774 | 24310 |
| R9 | mean(dY/dX) | 0.1027 | 0.0381 | 2.695 | 0.0070 | 7.2 | 0.0280 | 0.1774 | 47289 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.6614 | 0.1423 | 4.648 | <0.001 | 18.2 | 0.3825 | 0.9403 | 3285270 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.6558 | 0.1582 | 4.145 | <0.001 | 14.8 | 0.3457 | 0.9659 | 208614 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.8061 | 0.1940 | 4.155 | <0.001 | 14.9 | 0.4259 | 1.1863 | 64949 |
| R4a5 | mean(damage) - mean(none) | 0.5308 | 0.1355 | 3.916 | <0.001 | 13.4 | 0.2651 | 0.7964 | 1753203 |
| R4a5 | mean(experience) - mean(none) | 0.1012 | 0.1430 | 0.707 | 0.4794 | 1.1 | -0.1792 | 0.3815 | 537994 |
| R75 | mean(3) - mean(1) | -0.1777 | 0.1055 | -1.685 | 0.0919 | 3.4 | -0.3844 | 0.0290 | 70146 |
| R63_6 | mean(dY/dX) | -0.0722 | 0.0466 | -1.550 | 0.1211 | 3.0 | -0.1635 | 0.0191 | 4552 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.1801 | 0.0984 | 1.830 | 0.0673 | 3.9 | -0.0128 | 0.3730 | 47986 |
| R63_1 | mean(dY/dX) | -0.0182 | 0.0321 | -0.566 | 0.5711 | 0.8 | -0.0811 | 0.0447 | 6516 |
| R63_3 | mean(dY/dX) | 0.0096 | 0.0364 | 0.265 | 0.7914 | 0.3 | -0.0618 | 0.0811 | 9416 |
| R63_2 | mean(dY/dX) | 0.3430 | 0.0398 | 8.615 | <0.001 | 56.9 | 0.2650 | 0.4210 | 57378 |
| R63_4 | mean(dY/dX) | -0.0457 | 0.0339 | -1.348 | 0.1778 | 2.5 | -0.1123 | 0.0208 | 8429 |
| R97 | mean(Weiblich) - mean(Männlich) | -0.1452 | 0.1100 | -1.320 | 0.1870 | 2.4 | -0.3609 | 0.0705 | 66134 |
| R98 | mean(dY/dX) | 0.0005 | 0.0043 | 0.121 | 0.9033 | 0.1 | -0.0079 | 0.0089 | 29523 |
| R99 | mean(1) - mean(0) | 0.7031 | 0.4718 | 1.490 | 0.1362 | 2.9 | -0.2217 | 1.6279 | 1224024 |
| R106 | mean(dY/dX) | 0.0386 | 0.0658 | 0.587 | 0.5571 | 0.8 | -0.0903 | 0.1675 | 7922 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | -0.3110 | 0.1022 | -3.043 | 0.0023 | 8.7 | -0.5112 | -0.1107 | 143899 |
| R69 | mean(Apartment building) - mean(Single family home) | -0.1000 | 0.1322 | -0.756 | 0.4495 | 1.2 | -0.3592 | 0.1592 | 425008 |
| R70 | mean(dY/dX) | -0.0051 | 0.0015 | -3.401 | <0.001 | 10.5 | -0.0080 | -0.0022 | 10928 |
| Modus | mean(1) - mean(2) | 0.4447 | 0.1337 | 3.326 | <0.001 | 10.1 | 0.1827 | 0.7068 | 868049 |
| Modus | mean(3) - mean(2) | 0.1198 | 0.1731 | 0.692 | 0.4889 | 1.0 | -0.2195 | 0.4591 | 419458 |
### Goodness of fit measures
# R^2
Nagelkerke_pois_own <- numeric(30)
for (i in 1:30) {
Nagelkerke_pois_own_res <- c(DescTools::PseudoR2(pois_multiimp_own$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_pois_own[i] <- Nagelkerke_pois_own_res
}
median(Nagelkerke_pois_own)
## [1] 0.286602
# BIC
BIC_pois_own <- numeric(30)
for (i in 1:30) {
BIC_pois_own_res <- c(BIC(pois_multiimp_own$analyses[[i]]))
BIC_pois_own[i] <- BIC_pois_own_res
}
median(BIC_pois_own)
## [1] 4517.919
Check if logistic regression assumptions are met
### Test Assumptions with residual diagnostics
sim_fmp_own <- simulateResiduals(pois_multiimp_own$analyses[[16]], nSim = 1000)
# under- and overdispersion --> not. sign.
testDispersion(sim_fmp_own)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1, p-value = 1
## alternative hypothesis: two.sided
# Zero inflation --> not. sign.
testZeroInflation(sim_fmp_own)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 1, p-value = 1
## alternative hypothesis: two.sided
# Heteroscedasticity --> not. sign.
testQuantiles(sim_fmp_own)
##
## Test for location of quantiles via qgam
##
## data: simulationOutput
## p-value = 1
## alternative hypothesis: both
# KS test for correct distribution of residuals --> not. sign.
testUniformity(sim_fmp_own)
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.019, p-value = 0.8
## alternative hypothesis: two-sided
# test if outliers are a concern, bootstrap for integer-valued distributions
# sign., but not a problem (no underdispersion, to few outliers)
testOutliers(sim_fmp_own, type = "bootstrap")
##
## DHARMa bootstrapped outlier test
##
## data: sim_fmp_own
## outliers at both margin(s) = 0, observations = 1157, p-value
## <0.0000000000000002
## alternative hypothesis: two.sided
## percent confidence interval:
## 0.0008643 0.0069144
## sample estimates:
## outlier frequency (expected: 0.00399308556611927 )
## 0
# fit poisson regression to each imputed data set
pois_multiimp_ten <- with(KARE_MI,
glm(numadmeas ~ R104 + logR109 + rich + R71 + # generic capacity
R91 + R92 +
R90_1 + R90_5 +
R9 + R18 + R4a5 + R75 + # flood-specific capacity
R63_6 + R80a_1 + R63_1 +
R63_3 + R63_2 + R63_4 +
R97 + R98 + R99 + R106 + # CVs: gender, age, mig, hhsize
R69 + R70 + # CVs: house
Modus, # Survey design
family = 'poisson',
subset = (R7 == "Miete")))
# calculate cluster-robust AME
marginal_pois_ten <- avg_slopes(pois_multiimp_ten, vcov = ~town)
marginal_pois_ten$order <- c(34, 35, 1, 2, 30, 14, 15, 16, 17, 18, 23,
25, 24, 26, 21, 32, 31, 33, 6, 19, 20, 22, 13,
11, 12, 8, 10, 9, 27, 28, 29, 3, 4, 5)
marginal_pois_ten <- marginal_pois_ten %>%
mutate(across(c(3:5, 10:13), round, 4))
print(marginal_pois_ten[order(marginal_pois_ten$order, decreasing = F),], nrows = 40, style = "tinytable")
| Term | Contrast | Estimate | Std. Error | t | Pr(>|t|) | S | CI low | CI high | Df |
|---|---|---|---|---|---|---|---|---|---|
| Columns: term, contrast, estimate, std.error, s.value, predicted_lo, predicted_hi, predicted, df, statistic, p.value, conf.low, conf.high, order | |||||||||
| R104 | mean(intermediate secondary) - mean(no / lower secondary) | -0.1163 | 0.1818 | -0.6397 | 0.5223 | 0.9 | -0.4725 | 0.2400 | 153804 |
| R104 | mean(upper secondary) - mean(no / lower secondary) | -0.0848 | 0.1638 | -0.5179 | 0.6045 | 0.7 | -0.4058 | 0.2362 | 56476 |
| logR109 | mean(dY/dX) | 0.0067 | 0.1436 | 0.0470 | 0.9625 | 0.1 | -0.2749 | 0.2884 | 2153 |
| rich | mean(poor) - mean(middle) | -0.1583 | 0.1490 | -1.0620 | 0.2883 | 1.8 | -0.4505 | 0.1340 | 2596 |
| rich | mean(rich) - mean(middle) | 0.0789 | 0.2205 | 0.3576 | 0.7207 | 0.5 | -0.3534 | 0.5111 | 6087 |
| R71 | mean(dY/dX) | 0.0000 | 0.0009 | -0.0507 | 0.9596 | 0.1 | -0.0018 | 0.0017 | 2489 |
| R91 | mean(dY/dX) | -0.0004 | 0.0024 | -0.1447 | 0.8850 | 0.2 | -0.0051 | 0.0044 | 179497 |
| R92 | mean(medium-term) - mean(unsure/short-term) | 0.0480 | 0.0983 | 0.4883 | 0.6254 | 0.7 | -0.1447 | 0.2406 | 38158 |
| R92 | mean(long-term) - mean(unsure/short-term) | 0.1217 | 0.1259 | 0.9670 | 0.3335 | 1.6 | -0.1250 | 0.3684 | 174054 |
| R90_1 | mean(dY/dX) | 0.0648 | 0.0392 | 1.6521 | 0.0985 | 3.3 | -0.0121 | 0.1417 | 81401 |
| R90_5 | mean(dY/dX) | -0.0545 | 0.0401 | -1.3584 | 0.1744 | 2.5 | -0.1332 | 0.0242 | 107921 |
| R9 | mean(dY/dX) | 0.0047 | 0.0349 | 0.1339 | 0.8935 | 0.2 | -0.0637 | 0.0730 | 190821 |
| R18 | mean(Eher nicht wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.3330 | 0.1388 | 2.3991 | 0.0164 | 5.9 | 0.0609 | 0.6050 | 180606 |
| R18 | mean(Eher wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.4499 | 0.1608 | 2.7983 | 0.0051 | 7.6 | 0.1348 | 0.7651 | 188412 |
| R18 | mean(Sehr wahrscheinlich) - mean(Überhaupt nicht wahrscheinlich) | 0.4293 | 0.1739 | 2.4691 | 0.0135 | 6.2 | 0.0885 | 0.7701 | 116182 |
| R4a5 | mean(damage) - mean(none) | 0.7270 | 0.1940 | 3.7466 | <0.001 | 12.4 | 0.3467 | 1.1073 | 109266 |
| R4a5 | mean(experience) - mean(none) | 0.0861 | 0.1229 | 0.7005 | 0.4836 | 1.0 | -0.1548 | 0.3271 | 379704 |
| R75 | mean(2) - mean(1) | -0.0959 | 0.2426 | -0.3953 | 0.6927 | 0.5 | -0.5713 | 0.3796 | 92878 |
| R75 | mean(3) - mean(1) | 0.1361 | 0.2569 | 0.5297 | 0.5964 | 0.7 | -0.3675 | 0.6397 | 139055 |
| R63_6 | mean(dY/dX) | -0.0792 | 0.0375 | -2.1115 | 0.0348 | 4.8 | -0.1528 | -0.0057 | 5408 |
| R80a_1 | mean(Rather yes/yes) - mean(Rather no/no) | 0.0162 | 0.1056 | 0.1532 | 0.8782 | 0.2 | -0.1907 | 0.2231 | 81567 |
| R63_1 | mean(dY/dX) | 0.0084 | 0.0322 | 0.2617 | 0.7936 | 0.3 | -0.0547 | 0.0715 | 17741 |
| R63_3 | mean(dY/dX) | 0.0393 | 0.0295 | 1.3308 | 0.1833 | 2.4 | -0.0186 | 0.0971 | 37636 |
| R63_2 | mean(dY/dX) | 0.1483 | 0.0236 | 6.2785 | <0.001 | 31.4 | 0.1020 | 0.1946 | 34464 |
| R63_4 | mean(dY/dX) | -0.0217 | 0.0293 | -0.7392 | 0.4598 | 1.1 | -0.0791 | 0.0358 | 5213 |
| R97 | mean(Weiblich) - mean(Männlich) | -0.0457 | 0.0904 | -0.5055 | 0.6132 | 0.7 | -0.2230 | 0.1316 | 111983 |
| R98 | mean(dY/dX) | -0.0031 | 0.0030 | -1.0392 | 0.2987 | 1.7 | -0.0089 | 0.0027 | 33168 |
| R99 | mean(1) - mean(0) | -0.1160 | 0.1756 | -0.6610 | 0.5086 | 1.0 | -0.4601 | 0.2280 | 1294028 |
| R106 | mean(dY/dX) | 0.0074 | 0.0468 | 0.1584 | 0.8741 | 0.2 | -0.0843 | 0.0991 | 4048 |
| R69 | mean(Duplexe/terraced house) - mean(Single family home) | 0.1356 | 0.1188 | 1.1413 | 0.2538 | 2.0 | -0.0973 | 0.3685 | 26155 |
| R69 | mean(Apartment building) - mean(Single family home) | 0.0511 | 0.1025 | 0.4989 | 0.6178 | 0.7 | -0.1498 | 0.2520 | 56630 |
| R70 | mean(dY/dX) | 0.0006 | 0.0015 | 0.3981 | 0.6907 | 0.5 | -0.0023 | 0.0034 | 513 |
| Modus | mean(1) - mean(2) | 0.2705 | 0.1694 | 1.5969 | 0.1103 | 3.2 | -0.0615 | 0.6025 | 229449 |
| Modus | mean(3) - mean(2) | -0.1078 | 0.1216 | -0.8869 | 0.3751 | 1.4 | -0.3461 | 0.1305 | 691525 |
### Goodness of fit measures
# R^2
Nagelkerke_pois_ten <- numeric(30)
for (i in 1:30) {
Nagelkerke_pois_ten_res <- c(DescTools::PseudoR2(pois_multiimp_ten$analyses[[i]], which = "Nagelkerke"))
Nagelkerke_pois_ten[i] <- Nagelkerke_pois_ten_res
}
median(Nagelkerke_pois_ten)
## [1] 0.3301193
# BIC
BIC_pois_ten <- numeric(30)
for (i in 1:30) {
BIC_pois_ten_res <- c(BIC(pois_multiimp_ten$analyses[[i]]))
BIC_pois_ten[i] <- BIC_pois_ten_res
}
median(BIC_pois_ten)
## [1] 1070.011
Check if logistic regression assumptions are met
### Test Assumptions with residual diagnostics
sim_fmp_ten <- simulateResiduals(pois_multiimp_ten$analyses[[16]], nSim = 1000)
# under- and overdispersion --> not sign.
testDispersion(sim_fmp_ten)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.88, p-value = 0.2
## alternative hypothesis: two.sided
# Zero inflation --> not sign.
testZeroInflation(sim_fmp_ten)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0.99, p-value = 0.8
## alternative hypothesis: two.sided
# Heteroscedasticity --> not sign.
testQuantiles(sim_fmp_ten)
##
## Test for location of quantiles via qgam
##
## data: simulationOutput
## p-value = 0.4
## alternative hypothesis: both
# KS test for correct distribution of residuals --> not sign.
testUniformity(sim_fmp_ten)
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.041, p-value = 0.5
## alternative hypothesis: two-sided
# test if outliers are a concern, bootstrap for integer-valued distributions --> not sign.
testOutliers(sim_fmp_ten, type = "bootstrap")
##
## DHARMa bootstrapped outlier test
##
## data: sim_fmp_ten
## outliers at both margin(s) = 0, observations = 414, p-value = 0.5
## alternative hypothesis: two.sided
## percent confidence interval:
## 0.000000 0.009662
## sample estimates:
## outlier frequency (expected: 0.00282608695652174 )
## 0